From e2e4119923075975b0174a50e500f84b6ebb0707 Mon Sep 17 00:00:00 2001
From: Anastasios Zouzias <anastasios@sqooba.io>
Date: Sun, 4 Aug 2019 15:28:12 +0200
Subject: [PATCH] [sliq] presorted vectors

---
 CMakeLists.txt                 |   4 +-
 docker-compose.yml             |  10 +
 docker/Dockerfile              |   3 +
 examples/test-boston.py        |   2 +-
 examples/test-flight.py        |   2 +-
 src/CMakeLists.txt             |   4 +-
 src/GBT.h                      |  23 +-
 src/dataset.h                  |  87 ++------
 src/metrics/logloss.h          |   9 +-
 src/metrics/rmse.h             |   2 +-
 src/trees/class_list.h         |  58 +++++
 src/trees/split_info.h         |  92 --------
 src/trees/tree.h               | 210 ++++++++++++++++--
 src/trees/tree_builder_state.h |  42 ++++
 src/trees/treenode.h           | 375 ++++++++++++++++++++++++---------
 src/utils.h                    |  30 ++-
 test/CMakeLists.txt            |   2 +-
 test/test_dataset.cpp          |  19 --
 test/test_split_info.cpp       |   9 -
 test/test_treenode.cpp         |   2 +-
 test/test_utils.cpp            |  17 ++
 21 files changed, 671 insertions(+), 331 deletions(-)
 create mode 100644 docker-compose.yml
 create mode 100644 docker/Dockerfile
 create mode 100644 src/trees/class_list.h
 delete mode 100644 src/trees/split_info.h
 create mode 100644 src/trees/tree_builder_state.h
 delete mode 100644 test/test_split_info.cpp
 create mode 100644 test/test_utils.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b6d5e0..7e68858 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,7 +68,5 @@ include_directories( ${EIGEN3_INCLUDE_DIRS} )
 add_subdirectory(pybind11)
 
 pybind11_add_module(microgbtpy src/metrics/metric.h src/trees/tree.h src/GBT.h src/dataset.h src/metrics/logloss.h
-        src/trees/treenode.h src/trees/split_info.h
-        src/utils.h
-        src/python_api.cpp)
+        src/trees/treenode.h src/python_api.cpp)
 #########################
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..37f11d1
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,10 @@
+version: '3'
+
+services:
+  microgbt:
+    image: microgbt:0.0.1
+    build:
+      dockerfile: docker/Dockerfile
+      context: ./
+    volumes:
+      - .:/home/ubuntu
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..7119ab6
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,3 @@
+FROM zouzias/boost:1.71.0
+
+RUN apt-get -yq update && apt-get -yq install vim cmake python3-pip libeigen3-dev
diff --git a/examples/test-boston.py b/examples/test-boston.py
index 402e2ed..9b92e39 100755
--- a/examples/test-boston.py
+++ b/examples/test-boston.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import microgbtpy
 from math import sqrt
 import logging.config
diff --git a/examples/test-flight.py b/examples/test-flight.py
index ecdaa2a..c58da56 100755
--- a/examples/test-flight.py
+++ b/examples/test-flight.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import microgbtpy
 import logging.config
 import numpy as np
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e6e131f..5a3ddf4 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,6 +1,6 @@
-add_library(microgbt "" metrics/metric.h trees/tree.h trees/split_info.h
+add_library(microgbt "" metrics/metric.h trees/tree.h
         GBT.h dataset.h metrics/logloss.h trees/treenode.h metrics/rmse.h
-        utils.h trees/numerical_splliter.h trees/splitter.h)
+        utils.h trees/class_list.h trees/tree_builder_state.h)
 
 
 target_sources(
diff --git a/src/GBT.h b/src/GBT.h
index b7f3363..b8af918 100644
--- a/src/GBT.h
+++ b/src/GBT.h
@@ -2,6 +2,7 @@
 #include <vector>
 #include<iostream>
 #include <memory>
+#include <numeric>
 #include "dataset.h"
 #include "trees/tree.h"
 #include "metrics/metric.h"
@@ -28,17 +29,16 @@ namespace microgbt {
         /**
          * Return a single decision tree given training data, gradient, hession and shrinkage rate
          *
-         * @param trainSet
+         * @param dataset
          * @param gradient
          * @param hessian
          * @param shrinkageRate
          */
-        Tree buildTree(const Dataset &trainSet, const Vector& previousPreds, const Vector &gradient,
-                const Vector &hessian, double shrinkageRate) const {
-
+        Tree buildTree(const Dataset &dataset, const Vector &gradient, const Vector &hessian,
+                double shrinkageRate) const {
 
             Tree tree = Tree(_lambda, _minSplitGain, _minTreeSize, _maxDepth);
-            tree.build(trainSet, previousPreds, gradient, hessian, shrinkageRate);
+            tree.build(dataset, gradient, hessian, shrinkageRate);
             return tree;
         }
 
@@ -128,16 +128,21 @@ namespace microgbt {
                 auto startTimestamp = std::chrono::high_resolution_clock::now();
 
                 // Current predictions
-                Vector scores = predictDataset(trainSet);
+                Vector predictions = predictDataset(trainSet);
 
                 // Compute gradient and Hessian with respect to prior predictions
                 std::cout << "[Computing gradients/Hessians vectors]" << std::endl;
-                Vector gradient = _metric->gradients(scores, trainSet.y());
-                Vector hessian = _metric->hessian(scores);
+                Vector gradient = _metric->gradients(predictions, trainSet.y());
+                Vector hessian = _metric->hessian(predictions);
+
+                std::cout<< "Predictions" << std::endl;
+                for (size_t i = 0; i < gradient.size(); i++){
+                    std::cout << predictions[i] << " - " << trainSet.y()[i] << " / ";
+                }
 
                 // Grow a new tree learner
                 std::cout << "[Building next tree...]" << std::endl;
-                Tree tree = buildTree(trainSet, scores, gradient, hessian, learningRate);
+                Tree tree = buildTree(trainSet, gradient, hessian, learningRate);
                 std::cout << "[Tree is built successfully]" << std::endl;
 
                 // Update the learning rate
diff --git a/src/dataset.h b/src/dataset.h
index 3cb5c10..bcb5d8b 100644
--- a/src/dataset.h
+++ b/src/dataset.h
@@ -4,14 +4,14 @@
 #include <Eigen/Dense>
 #include <numeric>
 #include <memory>
-#include "trees/split_info.h"
+#include "utils.h"
 
 namespace microgbt {
 
     using Vector = std::vector<double>;
     using VectorT = std::vector<size_t>;
     using MatrixType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
-    using SortedMatrixType = Eigen::Matrix<int, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
+    using SortedMatrixType = std::vector<Permutation>;
 
     /**
     * Represents a machine learning "design matrix" and target vector, (X, y)
@@ -33,25 +33,23 @@ namespace microgbt {
 
         SortedMatrixType _sortedMatrixIdx;
 
-        VectorT _rowIndices;
-
         /**
          * Return sorted indices from an Eigen vector
          * @param v
          * @return
          */
-         Eigen::VectorXi sortIndices(long colIndex) const{
+         VectorT sortIndices(long colIndex) const{
 
             // initialize original index locations
             Eigen::VectorXd v = col(colIndex);
             unsigned int n = v.size();
 
-            Eigen::VectorXi idx(n);
+            VectorT idx(n);
             // idx contains now 0,1,...,v.size() - 1
-            std::iota(idx.data(), idx.data() + idx.size(), 0);
+            std::iota(idx.begin(), idx.end(), 0);
 
             // sort indexes based on comparing values in v
-            std::sort(idx.data(), idx.data() + idx.size(),
+            std::sort(idx.begin(), idx.end(),
                  [&v](int i1, int i2) {return v[i1] < v[i2];});
 
             return idx;
@@ -62,64 +60,22 @@ namespace microgbt {
         Dataset() = default;
 
         Dataset(const MatrixType& X, const Vector &y):
-        _sortedMatrixIdx(X.rows(), X.cols()),
-        _rowIndices(y.size()){
+        _sortedMatrixIdx(X.cols()) {
             _X = std::make_shared<MatrixType>(X);
             _y = std::make_shared<Vector>(y);
-            // By default, all rows are included in the dataset
-            std::iota(_rowIndices.begin(), _rowIndices.end(), 0);
-
             for ( long j = 0; j < X.cols(); j++) {
-                _sortedMatrixIdx.col(j) = sortIndices(j);
+                _sortedMatrixIdx[j] = Permutation(sortIndices(j));
             }
         }
 
 
         Dataset(Dataset const &dataset) = default;
 
-        /**
-         * Construct a Dataset, given a binary split gain and lef/right side parameter
-         * @param dataset
-         * @param bestGain
-         * @param side
-         */
-        Dataset(Dataset const &dataset, const SplitInfo &bestGain, SplitInfo::Side side):
-                _X(dataset.X()),_y(dataset.yptr()) {
-
-            _X = dataset.X();
-            _y = dataset.yptr();
-
-            VectorT localIds;
-            if (side == SplitInfo::Side::Left) {
-                localIds = bestGain.getLeftLocalIds();
-            } else {
-                localIds = bestGain.getRightLocalIds();
-            }
-
-            _rowIndices = VectorT(localIds.size());
-            VectorT otherRowIndices = dataset.rowIter();
-            for (size_t i = 0 ; i < localIds.size(); i++) {
-                _rowIndices[i] = otherRowIndices[localIds[i]];
-            }
-
-            int rows = _rowIndices.size(), cols = dataset.numFeatures();
-
-            _sortedMatrixIdx = SortedMatrixType(rows, cols);
-
-            for ( long j = 0; j < cols; j++) {
-                _sortedMatrixIdx.col(j) = sortIndices(j);
-            }
-        }
-
         inline size_t nRows() const {
-            return this->_rowIndices.size();
-        }
-
-        inline VectorT rowIter() const {
-            return _rowIndices;
+            return _X->rows();
         }
 
-        inline size_t numFeatures() const {
+        inline long numFeatures() const {
             return this->_X->cols();
         }
 
@@ -127,29 +83,16 @@ namespace microgbt {
             return _X;
         }
 
-        inline std::shared_ptr<Vector> yptr() const {
-            return _y;
-        }
-
         inline Vector y() const {
-            Vector proj(_rowIndices.size());
-            for (size_t i = 0; i < proj.size(); i++) {
-                proj[i] = (*_y)[_rowIndices[i]];
-            }
-            return proj;
+            return *_y;
         }
 
         inline Eigen::RowVectorXd row(long rowIndex) const {
-            return _X->row(_rowIndices[rowIndex]);
+            return _X->row(rowIndex);
         }
 
         inline Eigen::RowVectorXd col(long colIndex) const {
-            Eigen::RowVectorXd column(_rowIndices.size());
-            auto fullColumn = _X->col(colIndex);
-            for (size_t i = 0; i < _rowIndices.size(); i++) {
-                column[i] = fullColumn[_rowIndices[i]];
-            }
-            return column;
+           return _X->col(colIndex);
         }
 
         /**
@@ -157,8 +100,8 @@ namespace microgbt {
          * @param colIndex Index of column
          * @return
          */
-        inline Eigen::RowVectorXi sortedColumnIndices(long colIndex) const {
-            return _sortedMatrixIdx.col(colIndex);
+        inline Permutation sortedColumnIndices(long colIndex) const {
+            return _sortedMatrixIdx[colIndex];
         }
     };
 }
diff --git a/src/metrics/logloss.h b/src/metrics/logloss.h
index 9e84c3d..f9e13ee 100644
--- a/src/metrics/logloss.h
+++ b/src/metrics/logloss.h
@@ -12,7 +12,7 @@ namespace microgbt {
     /**
      * Log loss metric
      *
-     * Logistic loss: y_i ln(1 + exp(-pred_i)) + (1-y_i) ln( 1 + exp(pred_i))
+     * Negative Logistic loss: y_i ln(1 + exp(-pred_i)) + (1-y_i) ln( 1 + exp(pred_i))
      */
     class LogLoss :
             public Metric {
@@ -20,7 +20,6 @@ namespace microgbt {
     private:
         // Numerical tolerance on boundary of log(x) and log(1-x) function in range [0,1]
         double _eps;
-
     public:
 
         LogLoss() {
@@ -34,10 +33,10 @@ namespace microgbt {
          * @return
          */
         inline double clip(double value) const {
-            if ( value > 1 - _eps )
+            if (value > 1 - _eps)
                 return 1 - _eps;
 
-            if ( value < _eps)
+            if (value < _eps)
                 return _eps;
 
             return value;
@@ -85,4 +84,4 @@ namespace microgbt {
         }
 
     };
-}
\ No newline at end of file
+}
diff --git a/src/metrics/rmse.h b/src/metrics/rmse.h
index 5fef489..3858ce8 100644
--- a/src/metrics/rmse.h
+++ b/src/metrics/rmse.h
@@ -44,4 +44,4 @@ namespace microgbt {
         }
     };
 
-} // namespace microgbt
\ No newline at end of file
+} // namespace microgbt
diff --git a/src/trees/class_list.h b/src/trees/class_list.h
new file mode 100644
index 0000000..c313a24
--- /dev/null
+++ b/src/trees/class_list.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <set>
+
+namespace microgbt {
+
+    using NodeId = long;
+
+    /**
+     * ClassList
+     */
+    class ClassList {
+
+        Vector _gradients;
+        Vector _hessians;
+        std::vector<NodeId> _nodeIds;
+
+        // Node index to set of left subtree candidate samples
+        std::map<NodeId, std::set<long>> _leftCandidateSamples;
+
+    public:
+
+        explicit ClassList(const Vector &gradients, const Vector &hessians):
+        _gradients(gradients), _hessians(hessians),
+        _nodeIds(gradients.size()){
+            std::fill(_nodeIds.begin(), _nodeIds.end(), 0);
+        }
+
+        void clean() {
+            _leftCandidateSamples.clear();
+        }
+
+        NodeId nodeAt(long index) const {
+            return _nodeIds[index];
+        }
+
+        void appendSampleToLeftSubTree(NodeId nodeId, long index) {
+            _leftCandidateSamples[nodeId].insert(index);
+        }
+
+        void updateNodeId(long sampleIndex, NodeId newNodeId) {
+            _nodeIds[sampleIndex] = newNodeId;
+        }
+
+        std::set<long> getLeft(NodeId nodeId) {
+            return _leftCandidateSamples[nodeId];
+        }
+
+        long getLeftSize(NodeId nodeId) {
+            return _leftCandidateSamples[nodeId].size();
+        }
+
+        long getRightSize(NodeId nodeId) {
+            return (long)_gradients.size() - (long)_leftCandidateSamples[nodeId].size();
+        }
+
+    };
+} // namespace microgbt
\ No newline at end of file
diff --git a/src/trees/split_info.h b/src/trees/split_info.h
deleted file mode 100644
index 18f0ea6..0000000
--- a/src/trees/split_info.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma once
-#include <utility>
-#include <Eigen/Dense>
-#include<vector>
-
-
-namespace microgbt {
-
-        using VectorD = std::vector<double>;
-        using VectorT = std::vector<size_t>;
-
-        /**
-         * SplitInfo contains information of a binary tree split such as
-         * gain value, split numeric value on which best split gain is attained.
-         */
-        class SplitInfo {
-
-            private:
-
-            Eigen::RowVectorXi _sortedFeatureIndices;
-
-            /* Best gain and value (!?) */
-            double _bestGain = std::numeric_limits<double>::min(), _bestSplitNumericValue = 0.0;
-
-            size_t _bestSortedIndex = -1, _bestFeatureId = -1;
-
-            public:
-
-            enum Side{
-                Left,
-                Right
-            };
-
-            explicit SplitInfo() = default;
-
-            SplitInfo(double gain, double bestSplitNumericValue) {
-                _bestGain = gain;
-                _bestSplitNumericValue = bestSplitNumericValue;
-            }
-
-            SplitInfo(Eigen::RowVectorXi  sortedFeatureIndices, double gain, double bestSplitNumericValue, size_t bestSortedIdx):
-                _sortedFeatureIndices(std::move(sortedFeatureIndices)) {
-                _bestGain = gain;
-                _bestSplitNumericValue = bestSplitNumericValue;
-                _bestSortedIndex = bestSortedIdx;
-            }
-
-            bool operator < (const SplitInfo& rhs) const { return this->_bestGain <= rhs.bestGain(); }
-
-            inline double bestGain() const {
-                return _bestGain;
-            }
-
-            inline double splitValue() const {
-                return _bestSplitNumericValue;
-            }
-
-            void setBestFeatureId(size_t bestFeatureId) {
-                _bestFeatureId = bestFeatureId;
-            }
-            inline size_t getBestFeatureId() const {
-                return _bestFeatureId;
-            }
-
-            VectorT getLeftLocalIds() const {
-                return VectorT(_sortedFeatureIndices.data(),
-                        _sortedFeatureIndices.data() + _bestSortedIndex);
-            }
-
-            VectorT getRightLocalIds() const {
-                return VectorT(_sortedFeatureIndices.data() + _bestSortedIndex,
-                        _sortedFeatureIndices.data() + _sortedFeatureIndices.size());
-            }
-
-            VectorD split(const VectorD &vector, const SplitInfo::Side &side) const {
-                VectorT rowIndices;
-                if (side == SplitInfo::Side::Left)
-                    rowIndices = getLeftLocalIds();
-                else
-                    rowIndices = getRightLocalIds();
-
-                VectorD splitVector;
-                std::transform(rowIndices.begin(), rowIndices.end(),
-                        std::back_inserter(splitVector),
-                        [&vector](size_t rowIndex){
-                    return vector[rowIndex];
-                });
-
-                return splitVector;
-            }
-        };
-} // namespace microgbt
diff --git a/src/trees/tree.h b/src/trees/tree.h
index e352638..4bfdcf3 100644
--- a/src/trees/tree.h
+++ b/src/trees/tree.h
@@ -3,58 +3,219 @@
 #include <vector>
 #include <string>
 #include <map>
+#include <queue>
 #include <memory>
 #include <numeric>
 #include <cmath>
 #include <algorithm>
 #include "treenode.h"
-#include "split_info.h"
 #include "../metrics/metric.h"
+#include "class_list.h"
+#include "tree_builder_state.h"
 
 
 namespace microgbt {
 
+    using FeatureId = long;
+    using NodeId = long;
+    using GradientHessianPair = std::pair<double, double>;
+
+
+
     /**
-     * A Decision Tree with binary splits
+     * A binary decision tree that contains information regarding Gradient Boosting a la xgboost (gradients/hessians)
      */
     class Tree {
-    private:
+
         /** Maximum depth of tree */
         int _maxDepth;
 
+        long _minTreeSize;
+
         /** Gradient boosting parameters */
-        double _lambda, _minSplitGain, _minTreeSize;
+        double _lambda, _minSplitGain;
 
         /** Root of tree */
         std::shared_ptr<TreeNode> root;
 
+        std::vector<std::shared_ptr<TreeNode>> nodes;
+
+        /**
+         * Sort the sample indices for a given feature index 'feature_id'.
+         *
+         * It returns sorted indices depending on type of feature (categorical or numeric):
+         * Categorical feature: performs mean target encoding
+         * Numerical feature: natural sort on numeric value
+         *
+         * @param dataset Input design matrix and targets as Dataset
+         * @param featureId Feature / column of dataset
+         */
+        static Permutation sortSamplesByFeature(const Dataset &dataset,
+                                                       int featureId) {
+
+            return dataset.sortedColumnIndices(featureId);
+        }
+
     public:
 
-        Tree(double lambda, double minSplitGain, double minTreeSize, int maxDepth) {
+        Tree(double lambda, double minSplitGain, size_t minTreeSize, int maxDepth):
+        nodes(0) {
             _lambda = lambda;
             _minSplitGain = minSplitGain;
             _maxDepth = maxDepth;
             _minTreeSize = minTreeSize;
         }
 
+        std::shared_ptr<TreeNode> newTreeNode(size_t size) {
+            long nodeId = nodes.size();
+            TreeNode node(nodeId, _lambda, size);
+            node.makeLeaf();
+            nodes.push_back(std::make_shared<TreeNode>(node));
+            return nodes[nodeId];
+        }
+
          /**
-          * Recursively (and greedily) build decision tree using 'optimal' binary splits
-          * based on gradient & Hessian vectors.
+          * Build a tree using 'optimal' binary splits
+          * based on gradient & Hessian vectors (following xgboost's formulation).
+          *
+          * The tree building approach is described in SLIQ: A fast scalable classifier for data mining
+          * (https://doi.org/10.1007/BFb0014141)
           *
-          * @param train_set Training dataset
-          * @param previousPreds Previous iteration predictions
-          * @param gradient Vector of gradients
-          * @param hessian Vector of second derivatives, Hessian
-          * @param shrinkage Shrinkage rate
+          * @param dataset Training dataset
+          * @param gradient Vector of gradients to fit on
+          * @param hessian Vector of second derivatives, Hessians to fit on
+          * @param shrinkage Shrinkage additive boosting rate
           */
-        void build(const Dataset &trainSet, const Vector &previousPreds,
+        void build(const Dataset &dataset,
                    const Vector &gradient,
                    const Vector &hessian,
                    double shrinkage) {
 
-            this->root = std::unique_ptr<TreeNode>(new TreeNode(_lambda, _minSplitGain, _minTreeSize, _maxDepth));
-            int depth = 0;
-            this->root->build(trainSet, previousPreds, gradient, hessian, shrinkage, depth);
+            long nSamples = dataset.nRows();
+            // Create the root node
+            root = newTreeNode(nSamples);
+
+            // SLIQ classlist
+            ClassList classList(gradient, hessian);
+
+            // Sum the gradients / hessians over all samples
+            double G = microgbt::sum(gradient);
+            double H = microgbt::sum(hessian);
+
+            // Root node setup: gradient/hessian/weight
+            root->setLambda(_lambda);
+            root->setGradientSum(G);
+            root->setHessianSum(H);
+            root->setWeight(root->calc_leaf_weight());
+
+            // Build tree breadth first search (BFS) as in SLIQ
+            // That is, go over each depth level of the tree
+            for (int depth = 0; depth < _maxDepth ; depth ++) {
+                std::cout << "[Working on depth '" << depth << "']" << std::endl;
+
+                // Go over all features to compute optimal gain
+                for (FeatureId featureIdx = 0; featureIdx < dataset.numFeatures(); featureIdx++) {
+                    std::cout << "[Working on feature '" << featureIdx << " out of " << dataset.numFeatures() << "']" << std::endl;
+
+                    // Instantiate the tree builder state (SLIQ)
+                    TreeBuilderState state(gradient, hessian);
+
+                    // Clean the list of candidate left indices per leaf node
+                    classList.clean();
+
+                    Permutation perm = sortSamplesByFeature(dataset, featureIdx);
+                    const Eigen::RowVectorXd featureValues = dataset.col(featureIdx);
+
+                    // Go over all pre-sorted sample indices: 'sampleIdx'
+                    for (NodeId i = 0; i < nSamples; i++) {
+                        size_t sampleIdx = perm(i);
+                        double sortedFeatureValue = featureValues[sampleIdx];
+                        double g = gradient[sampleIdx];
+                        double h = hessian[sampleIdx];
+                        NodeId leafId = classList.nodeAt(sampleIdx);
+
+                        // Calculate gain on possible split on leafId
+                        // If gain is better than before on leaf “leafId”, mark it
+
+                        // Add gradient and Hessian to partial sums
+                        state.addToPartialSums(leafId, g, h);
+                        GradientHessianPair partialSums = state.partialSums(leafId);
+                        double gain = nodes[leafId]->calc_split_gain(partialSums.first, partialSums.second);
+
+                        // Assign sample with index sampleIdx to class list
+                        classList.appendSampleToLeftSubTree(leafId, sampleIdx);
+
+                        // If gain is better, mark it
+                        if (gain > nodes[leafId]->bestGain()
+                                 && gain > 0
+                                 && classList.getLeftSize(leafId) > _minTreeSize
+                                 && classList.getRightSize(leafId) > _minTreeSize
+                                ) {
+                            nodes[leafId]->updateWeight(shrinkage);
+                            nodes[leafId]->setLeftSampleIds(classList.getLeft(leafId));
+                            nodes[leafId]->setBestSplitValue(sortedFeatureValue);
+                            nodes[leafId]->setBestGain(gain);
+                            nodes[leafId]->setBestSplitFeatureId(featureIdx);
+                            nodes[leafId]->setLeftGradientSum(partialSums.first);
+                            nodes[leafId]->setLeftHessianSum(partialSums.second);
+                        }
+                    }
+                }
+
+                // At this point, leaves nodes contain all information to decide
+                // if a split will be performed or not
+
+                // Go over all leaves, say “l”
+                // Create the left and right sub-trees based on leaf info
+                size_t n = nodes.size();
+                for (size_t i = 0; i < n; i++) {
+
+                    if (nodes[i]->getSize() < _minTreeSize){
+                        nodes[i]->updateWeight(shrinkage);
+                        continue;
+                    }
+
+                    if (nodes[i]->getLeftSize() < _minTreeSize){
+                        nodes[i]->updateWeight(shrinkage);
+                        continue;
+                    }
+
+                    if (nodes[i]->getRightSize() < _minTreeSize) {
+                        nodes[i]->updateWeight(shrinkage);
+                        continue;
+                    }
+
+                    if (nodes[i]->bestGain() < _minSplitGain){
+                        nodes[i]->updateWeight(shrinkage);
+                        continue;
+                    }
+
+                    if (nodes[i]->isLeaf()) {
+                        nodes[i]->setLeftSubTree(newTreeNode(nodes[i]->getLeftSize()), shrinkage);
+                        nodes[i]->setRightSubTree(newTreeNode(nodes[i]->getRightSize()), shrinkage);
+                        nodes[i]->markInnerNode();
+                    }
+                }
+
+                // Go over leaves, say “l”
+                // Update each “previous leaf” with the left or right leaf pointer
+                for (long i = 0; i < nSamples; i++) {
+                    NodeId leafId = classList.nodeAt(i);
+                    if (!nodes[leafId]->isLeaf()) {
+                        if (nodes[leafId]->isLeftAssigned(i)) {
+                            classList.updateNodeId(i, nodes[leafId]->getLeftSubTreeId());
+                        } else {
+                            classList.updateNodeId(i, nodes[leafId]->getRightSubTreeId());
+                        }
+                    }
+                }
+            }
+
+            // FIXME: remove those
+            // std::string output = toDigraph();
+            // stdout the tree structure
+            // std::cout << output;
+
         }
 
         /**
@@ -66,5 +227,20 @@ namespace microgbt {
         double score(const Eigen::RowVectorXd &sample) const {
             return root->score(sample);
         }
+
+        /**
+         * Returns the tree represented as directed graph using dot notation
+         *
+         * @return
+         */
+        std::string toDigraph() const {
+            std::stringstream ss;
+            ss << "digraph {" << std::endl;
+            for (auto& line: root->toDigraph()) {
+                ss << "\t" << line << ";" << std::endl;
+            }
+            ss << "}" << std::endl;
+            return ss.str();
+        }
     };
-} // namespace microgbt
\ No newline at end of file
+} // namespace microgbt
diff --git a/src/trees/tree_builder_state.h b/src/trees/tree_builder_state.h
new file mode 100644
index 0000000..31551b6
--- /dev/null
+++ b/src/trees/tree_builder_state.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include<vector>
+#include<map>
+
+namespace microgbt {
+
+    using NodeId = long;
+    using Vector = std::vector<double>;
+    using GradientHessianPair = std::pair<double, double>;
+
+    /**
+    * Maintains state (partial gradient / hessian sums) over tree building process
+    */
+    class TreeBuilderState {
+        std::shared_ptr <Vector> _gradient;
+        std::shared_ptr <Vector> _hessian;
+        std::map <NodeId, GradientHessianPair> _partialSums;
+
+
+    public:
+        TreeBuilderState(const Vector &gradient, const Vector &hessian) : _gradient(std::make_shared<Vector>(gradient)),
+                                                                          _hessian(std::make_shared<Vector>(hessian)),
+                                                                          _partialSums() {
+        }
+
+        // Get partial Gradient/Hessian sums per tree node
+        GradientHessianPair partialSums(NodeId nodeId) const {
+            if (_partialSums.find(nodeId) == _partialSums.end()){
+                return GradientHessianPair(0.0, 0.0);
+            }
+            else {
+                return _partialSums.at(nodeId);
+            }
+        }
+
+        void addToPartialSums(NodeId nodeId, double g, double h) {
+            GradientHessianPair sums = partialSums(nodeId);
+            _partialSums[nodeId] = GradientHessianPair(sums.first + g, sums.second + h);
+        }
+    };
+}
diff --git a/src/trees/treenode.h b/src/trees/treenode.h
index 7acb310..2c559cf 100644
--- a/src/trees/treenode.h
+++ b/src/trees/treenode.h
@@ -1,6 +1,7 @@
 #pragma once
 #include<memory>
 #include<map>
+#include <utility>
 #include<vector>
 #include<string>
 #include<algorithm>
@@ -8,11 +9,10 @@
 #include<iterator>
 #include<iostream>
 #include <thread>
+#include <set>
 
 #include "../dataset.h"
-#include "split_info.h"
-#include "../utils.h"
-#include "numerical_splliter.h"
+#include "class_list.h"
 
 
 namespace microgbt {
@@ -22,32 +22,222 @@ namespace microgbt {
      */
     class TreeNode {
     private:
-        int _maxDepth;
-        double _lambda, _minSplitGain, _minTreeSize;
-        bool isLeaf = false;
-        std::unique_ptr<TreeNode> leftSubTree;
-        std::unique_ptr<TreeNode> rightSubTree;
-        long splitFeatureIndex;
+        NodeId _nodeId;
+        double _lambda;
+        std::shared_ptr<TreeNode> leftSubTree;
+        std::shared_ptr<TreeNode> rightSubTree;
+
+        // Feature id on which the best split happened on the current node
+        long _bestSplitFeatureId;
 
         /**
          * Numeric value on which a binary tree split took place
          */
-        double splitNumericValue;
-        double weight = 0.0;
+        double _bestSplitNumericValue, _weight = 0;
+
+        // Total gradient and Hessian sum at current node
+        double _gradientSum, _hessianSum;
+
+        double _bestGain, _leftGradientSum, _leftHessianSum;
+
+        // Is TreeNode a leaf?
+        bool _isLeaf = true;
 
-        template<typename T, typename... Args>
-        std::unique_ptr<T> make_unique(Args&&... args)
-        {
-            return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+        // Number of samples assigned to current node
+        long _size;
+
+        // Set of sample indices that corresponse to left subtree
+        std::set<long> _leftSampleIds;
+
+        explicit TreeNode(){
+            _nodeId = 0;
+            _lambda = 0;
+            _bestGain = std::numeric_limits<double>::min();
+            _bestSplitNumericValue = std::numeric_limits<double>::min();
+            _bestSplitFeatureId = -1;
+            _gradientSum = 0.0;
+            _hessianSum = 0.0;
+            _leftGradientSum = 0.0;
+            _leftHessianSum = 0.0;
+            _weight = 0.0;
+            _isLeaf = true;
+            leftSubTree = nullptr;
+            rightSubTree = nullptr;
         }
 
     public:
 
-        TreeNode(double lambda, double minSplitGain, double minTreeSize, int maxDepth){
+        TreeNode(long nodeId, double lambda, size_t size){
+            _nodeId = nodeId;
+            _lambda = lambda;
+            _bestGain = std::numeric_limits<double>::lowest();
+            _bestSplitNumericValue = std::numeric_limits<double>::lowest();
+            _bestSplitFeatureId = -1;
+            _gradientSum = 0.0;
+            _hessianSum = 0.0;
+            _leftGradientSum = 0.0;
+            _leftHessianSum = 0.0;
+            _weight = 0.0;
+            _size = size;
+            _isLeaf = true;
+            leftSubTree = nullptr;
+            rightSubTree = nullptr;
+        }
+
+        TreeNode(const TreeNode &treeNode) {
+            _nodeId = treeNode._nodeId;
+            _lambda = treeNode._lambda;
+            _bestGain = treeNode._bestGain;
+            _bestSplitNumericValue = treeNode._bestSplitNumericValue;
+            _bestSplitFeatureId = treeNode._bestSplitFeatureId;
+            _gradientSum = treeNode._gradientSum;
+            _hessianSum = treeNode._hessianSum;
+            _leftGradientSum = treeNode._leftGradientSum;
+            _leftHessianSum = treeNode._leftHessianSum;
+            leftSubTree = treeNode.leftSubTree;
+            rightSubTree = treeNode.rightSubTree;
+            _isLeaf = treeNode._isLeaf;
+            _size = treeNode._size;
+            _leftSampleIds = treeNode._leftSampleIds;
+            _weight = treeNode._weight;
+        }
+
+        void markInnerNode() {
+            _isLeaf = false;
+        }
+
+        void makeLeaf() {
+            _isLeaf = true;
+        }
+
+        inline bool isLeaf() const{
+            return _isLeaf;
+        }
+
+        inline double bestGain() const {
+            return _bestGain;
+        }
+
+        void setBestGain(double gain) {
+            _bestGain = gain;
+        }
+
+        void setLeftSampleIds(const std::set<long>& leftSampleIds){
+            _leftSampleIds = leftSampleIds;
+        }
+
+        bool isLeftAssigned(long sampleId) {
+            return std::find(_leftSampleIds.begin(), _leftSampleIds.end(), sampleId) != _leftSampleIds.end();
+        }
+
+        void setLeftGradientSum(double value) {
+            _leftGradientSum = value;
+        }
+
+        void updateWeight(double shrinkage) {
+            _weight = calc_leaf_weight() * shrinkage;
+        }
+
+        void setLeftHessianSum(double value) {
+            _leftHessianSum = value;
+        }
+
+        void setBestSplitFeatureId(long id) {
+            _bestSplitFeatureId = id;
+        }
+
+        void setWeight(double weight) {
+            _weight = weight;
+        }
+
+        long getSize() const {
+            return _size;
+        }
+
+        void zero() {
+            _leftSampleIds.clear();
+        }
+
+        void setBestSplitValue(double bestSplitValue) {
+            _bestSplitNumericValue = bestSplitValue;
+        }
+
+        void setLambda(double lambda) {
             _lambda = lambda;
-            _minSplitGain = minSplitGain;
-            _maxDepth = maxDepth;
-            _minTreeSize = minTreeSize;
+        }
+
+        void setGradientSum(double gradientSum) {
+            _gradientSum = gradientSum;
+        }
+
+        void setHessianSum(double hessianSum) {
+            _hessianSum = hessianSum;
+        }
+
+        inline double getRightGradientSum() const {
+            return _gradientSum - _leftGradientSum;
+        }
+
+        inline double getRightHessianSum() const {
+            return std::max(_hessianSum - _leftHessianSum, 0.0);
+        }
+
+        inline double getGradientSum() const {
+            return _gradientSum;
+        }
+
+        inline double getHessianSum() const {
+            return _hessianSum;
+        }
+
+        inline NodeId getLeftSubTreeId() const {
+            if (leftSubTree == nullptr) {
+                return -1;
+            }
+            return leftSubTree->getNodeId();
+        }
+
+        inline NodeId getRightSubTreeId() const {
+            if (rightSubTree == nullptr) {
+                return -1;
+            }
+            return rightSubTree->getNodeId();
+        }
+
+        inline NodeId getNodeId() const {
+            return _nodeId;
+        }
+
+        inline long getLeftSize() const {
+            return _leftSampleIds.size();
+        }
+
+        inline long getRightSize() const {
+            return _size - getLeftSize();
+        }
+
+        void setLeftSubTree(std::shared_ptr<TreeNode> treeNodePtr, double shrinkage) {
+            leftSubTree = treeNodePtr;
+            leftSubTree->setLambda(_lambda);
+            leftSubTree->setGradientSum(_leftGradientSum);
+            assert(_leftHessianSum > 0);
+            leftSubTree->setHessianSum(_leftHessianSum);
+            leftSubTree->_leftGradientSum = 0.0;
+            leftSubTree->_leftHessianSum = 0.0;
+            leftSubTree->makeLeaf();
+            leftSubTree->updateWeight(shrinkage);
+        }
+
+        void setRightSubTree(std::shared_ptr<TreeNode> treeNodePtr, double shrinkage) {
+            rightSubTree = treeNodePtr;
+            rightSubTree->setLambda(_lambda);
+            rightSubTree->setGradientSum(getRightGradientSum());
+            assert( getRightHessianSum() >= 0);
+            rightSubTree->setHessianSum(getRightHessianSum());
+            rightSubTree->_leftGradientSum = 0.0;
+            rightSubTree->_leftHessianSum = 0.0;
+            rightSubTree->makeLeaf();
+            rightSubTree->updateWeight(shrinkage);
         }
 
         /**
@@ -61,87 +251,43 @@ namespace microgbt {
           * @param hessian
           * @return
           */
-        inline double calc_leaf_weight(const Vector &gradient,
-                                       const Vector &hessian) const {
-            return - par_simd_accumulate(gradient)
-                   / (par_simd_accumulate(hessian) + _lambda);
+        inline double calc_leaf_weight() const {
+            return - _gradientSum / (_hessianSum + _lambda);
         }
 
-
-
         /**
-         * Recursively (and greedily) split a TreeNode based on
-         *
-         * Exact Greedy Algorithm for Split Finding:
-         * 1) For each tree node, enumerate over all features:
-         * 2) For each feature, sorted the instances by feature numeric value
-         * 3) Use a linear scan to decide the best split along that feature (if categorical perform Mean Target Encoding)
-         * 4) Take the best split solution (that maximises gain reduction) over all features
-         * 5) Recurse on the left and right side of the best split
+         * Returns objective value for a given gradient, Hessian and lambda value (gradient boosting parameters)
          *
-         *  (Refer to Algorithm1 of Reference[1])
-         *
-         * @param trainSet
-         * @param previousPreds
          * @param gradient
          * @param hessian
-         * @param shrinkage
-         * @param depth
+         * @param lambd
+         * @return
          */
-        void build(const Dataset &trainSet,
-                   const Vector &previousPreds,
-                   const Vector &gradient,
-                   const Vector &hessian,
-                   double shrinkage,
-                   int depth) {
-
-            // Check if depth is reached
-            if (depth > _maxDepth) {
-                this->isLeaf = true;
-                this->weight = this->calc_leaf_weight(gradient, hessian) * shrinkage;
-                return;
-            }
-
-            // Check if # of sample is too small
-            if ( trainSet.nRows() <= _minTreeSize) {
-                this->isLeaf = true;
-                this->weight = this->calc_leaf_weight(gradient, hessian) * shrinkage;
-                return;
-            }
-
-            // Initialize a numeric splitter and find best split
-            std::unique_ptr<microgbt::Splitter> splitter = make_unique<microgbt::NumericalSplitter>(_lambda);
-            SplitInfo bestGain = splitter->findBestSplit(trainSet, gradient, hessian);
-
-            // Check if best gain is less than minimum split gain (threshold)
-            if (bestGain.bestGain() < this->_minSplitGain) {
-                this->isLeaf = true;
-                this->weight = this->calc_leaf_weight(gradient, hessian) * shrinkage;
-                return;
-            }
-
-            this->splitFeatureIndex = bestGain.getBestFeatureId();
-            this->splitNumericValue = bestGain.splitValue();
-
-
-            Dataset leftDataset(trainSet, bestGain, SplitInfo::Side::Left);
-            Vector leftGradient = bestGain.split(gradient, SplitInfo::Side::Left);
-            Vector leftHessian = bestGain.split(hessian, SplitInfo::Side::Left);
-            Vector leftPreviousPreds = bestGain.split(previousPreds, SplitInfo::Side::Left);
-            this->leftSubTree = std::unique_ptr<TreeNode>(
-                    new TreeNode(_lambda, _minSplitGain, _minTreeSize, _maxDepth));
-            leftSubTree->build(leftDataset, leftPreviousPreds, leftGradient, leftHessian, shrinkage, depth + 1);
+        constexpr double objective(double gradient, double hessian) const {
+            return (gradient * gradient) / (hessian + _lambda);
+        }
 
-            Dataset rightDataset(trainSet, bestGain, SplitInfo::Side::Right);
-            Vector rightGradient = bestGain.split(gradient, SplitInfo::Side::Right);
-            Vector rightHessian = bestGain.split(hessian, SplitInfo::Side::Right);
-            Vector rightPreviousPreds = bestGain.split(previousPreds, SplitInfo::Side::Right);
 
-            this->rightSubTree = std::unique_ptr<TreeNode>(
-                    new TreeNode(_lambda, _minSplitGain, _minTreeSize, _maxDepth));
-            rightSubTree->build(rightDataset, rightPreviousPreds, rightGradient, rightHessian, shrinkage, depth + 1);
+        /**
+        * Returns gain difference of a specific binary tree split.
+        *
+        * Refer to Eq7 of Reference [1]
+        *
+        * @param G Gradient on node before the split applied
+        * @param H Hessian on node before the split applied
+        * @param G_l Gradient on left split node
+        * @param H_l Hesssian on left split node
+        * @param G_r Gradient on right split node
+        * @param H_r Hesisan on right split node
+        * @param lambd Regularization xgboost parameter, see Eqn. 7 in [1]
+        * @return
+        */
+        constexpr double calc_split_gain(double G_l, double H_l) const {
+            return objective(G_l, H_l) + objective(_gradientSum - G_l, _hessianSum - H_l)
+            - objective(_gradientSum, _hessianSum) / 2.0; // TODO: minus \gamma
         }
 
+
         /**
          * Return the score for a given sample, i.e. set of features
          *
@@ -149,13 +295,54 @@ namespace microgbt {
          * @return
          */
         double score(const Eigen::RowVectorXd &sample) const {
-            if (this->isLeaf) {
-                return this->weight;
-            } else if (sample[this->splitFeatureIndex] < this->splitNumericValue) {
-                return this->leftSubTree->score(sample);
+            if (_isLeaf) {
+                return _weight;
+            } else if (sample[_bestSplitFeatureId] < _bestSplitNumericValue) {
+                return leftSubTree->score(sample);
             } else {
-                return this->rightSubTree->score(sample);
+                return rightSubTree->score(sample);
             }
         }
+
+        /**
+         * Returns a list of string containing all subtree information in DOT (a graph description language)
+         *
+         * See: https://en.wikipedia.org/wiki/DOT_(graph_description_language)
+         * @return
+         */
+        std::vector<std::string> toDigraph() const {
+            std::vector<std::string> output;
+            if (isLeaf()) {
+                return output;
+            }
+
+            NodeId id = this->getNodeId();
+            NodeId left = this->getLeftSubTreeId();
+            output.push_back(std::to_string(id) + " [label=\"" + print() + "\"]");
+            output.push_back(std::to_string(id) + " -> " + std::to_string(left));
+
+            NodeId right = this->getRightSubTreeId();
+            output.push_back(std::to_string(id) + " -> " + std::to_string(right));
+
+
+            auto leftCollection = leftSubTree->toDigraph();
+            auto rightCollection = rightSubTree->toDigraph();
+            std::copy(begin(leftCollection), end(leftCollection), std::back_inserter(output));
+            std::copy(begin(rightCollection), end(rightCollection), std::back_inserter(output));
+
+            return output;
+        }
+
+        std::string print() const {
+            std::stringstream ss;
+            ss << " | Node id: " << getNodeId();
+            ss << " | Best Gain: " << bestGain();
+            ss << " | Weight: " << calc_leaf_weight();
+            ss << " | Left size: " << getLeftSize();
+            ss  << " | S: " << getSize();
+            ss << " | G sum: " << getGradientSum();
+            ss << " | H sum: " << getHessianSum();
+            return ss.str();
+        }
     };
 } // namespace microgbt
diff --git a/src/utils.h b/src/utils.h
index d3b1aca..0ece31b 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -6,14 +6,36 @@ namespace microgbt {
 
     using Vector = std::vector<double>;
 
-    static double par_simd_accumulate(const Vector& vector) {
+    static double sum(const Vector& vector) {
         size_t n = vector.size();
         double accumulate = 0.0;
-        #pragma omp simd reduction(+: accumulate)
         for (size_t i = 0; i < n; i ++){
-            accumulate  += vector[i];
+            accumulate += vector[i];
         }
 
         return accumulate;
     }
-} // namespace microgbt
+
+    class Permutation {
+        std::vector<size_t> _perm, _inverse;
+    public:
+
+        explicit Permutation() = default;
+
+        explicit Permutation(const std::vector<size_t> &permVector):
+        _perm(permVector),
+        _inverse(_perm.size()){
+            for (size_t i = 0 ; i < _perm.size(); i++){
+                _inverse[_perm[i]] = i;
+            }
+        }
+
+        constexpr size_t operator()(size_t index) const{
+            return _perm[index];
+        }
+
+        constexpr size_t inverse(size_t index) const{
+            return _inverse[index];
+        }
+    };
+} // namespace microgbt
\ No newline at end of file
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index dc452bb..5743df8 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,12 +1,12 @@
 add_executable(
     unit_tests
         main.cpp
-        test_split_info.cpp
         test_metric_rmse.cpp
         test_dataset.cpp
         test_treenode.cpp
         test_tree.cpp
         test_gbt.cpp
+        test_utils.cpp
     ../src/metrics/metric.h ../src/trees/tree.h ../src/GBT.h ../src/dataset.h ../src/metrics/rmse.h)
 
 target_link_libraries(
diff --git a/test/test_dataset.cpp b/test/test_dataset.cpp
index c4858de..def9257 100644
--- a/test/test_dataset.cpp
+++ b/test/test_dataset.cpp
@@ -12,22 +12,3 @@ TEST(Dataset, DefaultConstructor)
     ASSERT_EQ(dataset.nRows(),  m);
     ASSERT_EQ(dataset.numFeatures(), n);
 }
-
-TEST(Dataset, Constructor)
-{
-
-    size_t m = 2, n = 3;
-    Eigen::MatrixXd A = Eigen::MatrixXd::Zero(m, n);
-    microgbt::Vector y = {1.0, 2.0};
-    microgbt::Dataset dataset(A, y);
-
-    std::vector<size_t> left = {0};
-    std::vector<size_t> right = {1, 2};
-    microgbt::SplitInfo splitInfo(dataset.sortedColumnIndices(0), 0.0, 1.0, 1);
-
-    microgbt::Dataset leftDS(dataset, splitInfo, microgbt::SplitInfo::Left);
-
-    ASSERT_EQ(leftDS.nRows(), left.size());
-    ASSERT_EQ(leftDS.numFeatures(), n);
-
-}
diff --git a/test/test_split_info.cpp b/test/test_split_info.cpp
deleted file mode 100644
index dbc1538..0000000
--- a/test/test_split_info.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "gtest/gtest.h"
-#include "trees/split_info.h"
-
-TEST(microgbt, SplitInfo)
-{
-    microgbt::SplitInfo gain(0.0, 1.0);
-    ASSERT_NEAR(gain.bestGain(), 0.0, 1.0e-11);
-    ASSERT_NEAR(gain.splitValue(), 1.0, 1.0e-11);
-}
diff --git a/test/test_treenode.cpp b/test/test_treenode.cpp
index d202804..9bc86f1 100644
--- a/test/test_treenode.cpp
+++ b/test/test_treenode.cpp
@@ -3,6 +3,6 @@
 
 TEST(microgbt, TreeNode)
 {
-    microgbt::TreeNode treeNode(1.0, 2.0, 2, 10);
+    microgbt::TreeNode treeNode(1.0, 2.0, 0);
     ASSERT_TRUE(true);
 }
diff --git a/test/test_utils.cpp b/test/test_utils.cpp
new file mode 100644
index 0000000..165acbb
--- /dev/null
+++ b/test/test_utils.cpp
@@ -0,0 +1,17 @@
+#include <utils.h>
+#include "gtest/gtest.h"
+
+TEST(microgbt, Permutation)
+{
+    std::vector<size_t> permVec{1, 2, 0};
+    microgbt::Permutation perm(permVec);
+
+
+    ASSERT_EQ(perm(0), 1);
+    ASSERT_EQ(perm(1), 2);
+    ASSERT_EQ(perm(2), 0);
+
+    ASSERT_EQ(perm.inverse(0), 2);
+    ASSERT_EQ(perm.inverse(1), 0);
+    ASSERT_EQ(perm.inverse(2), 1);
+}