Skip to content

Commit 9358116

Browse files
authored
Merge pull request #5 from DEIS-Tools/hidden_actions
Fixing issues with hidden actions. Also improves on variance computation.
2 parents 65df567 + c5861ed commit 9358116

11 files changed

+150
-117
lines changed

src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ project(PRLearn C CXX)
33
set(CMAKE_CXX_STANDARD 17)
44
set(CMAKE_INCLUDE_CURRENT_DIR ON)
55

6-
find_package(Boost 1.54 REQUIRED COMPONENTS headers REQUIRED)
6+
find_package(Boost 1.54 REQUIRED)
77

88
add_library(prlearn SHARED ${HEADER_FILES} MLearning.cpp SimpleMLearning.cpp RefinementTree.cpp structs.cpp)
99
add_library(prlearnStatic STATIC ${HEADER_FILES} MLearning.cpp SimpleMLearning.cpp RefinementTree.cpp structs.cpp)

src/MLearning.cpp

+23-20
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
/*
22
* Copyright Peter G. Jensen
3-
*
3+
*
44
* This program is free software: you can redistribute it and/or modify
55
* it under the terms of the GNU Lesser General Public License as published by
66
* the Free Software Foundation, either version 3 of the License, or
77
* (at your option) any later version.
8-
*
8+
*
99
* This program is distributed in the hope that it will be useful,
1010
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1111
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1212
* GNU General Public License for more details.
13-
*
13+
*
1414
* You should have received a copy of the GNU Lesser General Public License
1515
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1616
*/
1717

18-
/*
18+
/*
1919
* File: MLearning.cpp
2020
* Author: Peter G. Jensen
21-
*
21+
*
2222
* Created on July 25, 2017, 9:58 AM
2323
*/
2424

@@ -104,7 +104,8 @@ namespace prlearn {
104104
}
105105

106106
void MLearning::addSample(size_t dimen, const double* f_var,
107-
const double* t_var, size_t label,
107+
const double* t_var, size_t*, size_t,
108+
size_t label,
108109
size_t dest, double value, const std::vector<MLearning>& clouds,
109110
bool minimization, const double delta,
110111
const propts_t& options) {
@@ -225,7 +226,7 @@ namespace prlearn {
225226
}
226227
}
227228

228-
void MLearning::update(const std::vector<MLearning>&, bool)
229+
void MLearning::update(const std::vector<MLearning>&, bool)
229230
{
230231
}
231232

@@ -254,10 +255,10 @@ namespace prlearn {
254255
auto c = clouds[s._cloud]._nodes[s._nodes[i]]._q.avg();
255256
fut = std::min(fut, c);
256257
if (c == best)
257-
var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q._variance);
258+
var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q.variance());
258259
else if ((c < best && minimize) || (c > best && !minimize)) {
259260
best = c;
260-
var = clouds[s._cloud]._nodes[s._nodes[i]]._q._variance;
261+
var = clouds[s._cloud]._nodes[s._nodes[i]]._q.variance();
261262
}
262263
}
263264
}
@@ -274,8 +275,8 @@ namespace prlearn {
274275
auto v = s._variance[d];
275276
v.first.avg() += best;
276277
v.second.avg() += best;
277-
v.first._variance = std::max(v.first._variance, var);
278-
v.second._variance = std::max(v.second._variance, var);
278+
v.first.set_variance(std::max(v.first.variance(), var));
279+
v.second.set_variance(std::max(v.second.variance(), var));
279280
tmpq[d].first.addPoints(v.first.cnt(), v.first.avg());
280281
tmpq[d].second.addPoints(v.second.cnt(), v.second.avg());
281282
mean.addPoints(v.first.cnt(), v.first.avg());
@@ -287,8 +288,8 @@ namespace prlearn {
287288
auto v = s._old[d];
288289
v.first.avg() += best;
289290
v.second.avg() += best;
290-
v.first._variance = std::max(v.first._variance, var);
291-
v.second._variance = std::max(v.second._variance, var);
291+
v.first.set_variance(std::max(v.first.variance(), var));
292+
v.second.set_variance(std::max(v.second.variance(), var));
292293
old_mean.addPoints(v.first.cnt(), v.first.avg());
293294
old_mean.addPoints(v.second.cnt(), v.second.avg());
294295
old_var.push_back(v.first);
@@ -304,7 +305,7 @@ namespace prlearn {
304305
for (auto& s : sample_qvar) {
305306
{
306307
const auto dif = std::abs(s.avg() - mean._avg);
307-
const auto std = std::sqrt(s._variance);
308+
const auto std = std::sqrt(s.variance());
308309
auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
309310
svar.addPoints(s.cnt(), var);
310311
}
@@ -316,7 +317,7 @@ namespace prlearn {
316317
}
317318
{
318319
const auto dif = std::abs(s.avg() - dmin);
319-
const auto std = std::sqrt(s._variance);
320+
const auto std = std::sqrt(s.variance());
320321
auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
321322
vars[id].addPoints(s.cnt(), var);
322323
}
@@ -327,18 +328,20 @@ namespace prlearn {
327328

328329
for (auto& s : old_var) {
329330
const auto dif = std::abs(s.avg() - old_mean._avg);
330-
const auto std = std::sqrt(s._variance);
331+
const auto std = std::sqrt(s.variance());
331332
auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
332333
ovar.addPoints(s.cnt(), var);
333334
}
334335

335336
for (size_t i = 0; i < dimen; ++i) {
336-
tmpq[i].first._variance = vars[i]._avg;
337-
tmpq[i].second._variance = vars[i + dimen]._avg;
337+
tmpq[i].first.set_variance(vars[i]._avg);
338+
tmpq[i].second.set_variance(vars[i + dimen]._avg);
338339
}
339340

340-
qvar_t nq(mean._avg, mean._cnt / (dimen * 2), svar._avg);
341-
qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), ovar._avg);
341+
qvar_t nq(mean._avg, mean._cnt / (dimen * 2), 0);
342+
nq.set_variance(svar._avg);
343+
qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), 0);
344+
oq.set_variance(ovar._avg);
342345
return std::make_pair(nq, oq);
343346
}
344347

src/MLearning.h

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
/*
22
* Copyright Peter G. Jensen
3-
*
3+
*
44
* This program is free software: you can redistribute it and/or modify
55
* it under the terms of the GNU Lesser General Public License as published by
66
* the Free Software Foundation, either version 3 of the License, or
77
* (at your option) any later version.
8-
*
8+
*
99
* This program is distributed in the hope that it will be useful,
1010
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1111
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1212
* GNU General Public License for more details.
13-
*
13+
*
1414
* You should have received a copy of the GNU Lesser General Public License
1515
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1616
*/
1717

1818

19-
/*
19+
/*
2020
* File: MLearning.h
2121
* Author: Peter G. Jensen
2222
*
@@ -32,7 +32,7 @@
3232
#include <map>
3333
#include <limits>
3434

35-
namespace prlearn {
35+
namespace prlearn {
3636

3737
class MLearning {
3838
public:
@@ -45,6 +45,7 @@ namespace prlearn {
4545

4646
void addSample(size_t dimen, // dimensions
4747
const double* f_var, const double* t_var, // doubles
48+
size_t* next_labels, size_t n_labels, // actions in dest, ignored in m learning
4849
size_t label, // edge chosen, edge taken
4950
size_t dest, double value, // cost
5051
const std::vector<MLearning>& clouds, // other points

src/QLearning.h

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
/*
22
* Copyright Peter G. Jensen
3-
*
3+
*
44
* This program is free software: you can redistribute it and/or modify
55
* it under the terms of the GNU Lesser General Public License as published by
66
* the Free Software Foundation, either version 3 of the License, or
77
* (at your option) any later version.
8-
*
8+
*
99
* This program is distributed in the hope that it will be useful,
1010
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1111
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1212
* GNU General Public License for more details.
13-
*
13+
*
1414
* You should have received a copy of the GNU Lesser General Public License
1515
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1616
*/
1717

18-
/*
18+
/*
1919
* File: QLearning.h
2020
* Author: Peter G. Jensen
2121
*
@@ -46,6 +46,7 @@ namespace prlearn {
4646

4747
void addSample(size_t dimen, // dimensions
4848
const double* f_val, const double* t_val, // source, destination-states
49+
size_t* next_labels, size_t n_labels,
4950
size_t label, // action chosen,
5051
size_t dest, double value, // destination, cost
5152
const std::vector<QLearning>& clouds, // other points
@@ -67,14 +68,15 @@ namespace prlearn {
6768
template<typename Regressor>
6869
void QLearning<Regressor>::addSample(size_t dimen, // dimensions
6970
const double* f_var, const double* t_var, // doubles
71+
size_t* next_labels, size_t n_labels, // possible actions
7072
size_t label, size_t dest, double value, // cost
7173
const std::vector<QLearning<Regressor>>&clouds, // other points
7274
bool minimization, const double delta, const propts_t& options) {
7375
// The ALPHA part of Q-learning is handled inside the regressors
7476
auto toDone = 0.0;
7577

7678
if (dest != 0 && options._discount != 0)
77-
toDone = clouds[dest]._regressor.getBestQ(t_var, minimization); // 0 is a special sink-node.
79+
toDone = clouds[dest]._regressor.getBestQ(t_var, minimization, next_labels, n_labels); // 0 is a special sink-node.
7880
auto nval = value;
7981
// if future is not a weird number, then add it (discounted)
8082
if (!std::isinf(toDone) && !std::isnan(toDone)) {

src/RefinementTree.cpp

+36-18
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
/*
22
* Copyright Peter G. Jensen
3-
*
3+
*
44
* This program is free software: you can redistribute it and/or modify
55
* it under the terms of the GNU Lesser General Public License as published by
66
* the Free Software Foundation, either version 3 of the License, or
77
* (at your option) any later version.
8-
*
8+
*
99
* This program is distributed in the hope that it will be useful,
1010
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1111
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1212
* GNU General Public License for more details.
13-
*
13+
*
1414
* You should have received a copy of the GNU Lesser General Public License
1515
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1616
*/
1717

18-
/*
18+
/*
1919
* File: RefinementTree.cpp
2020
* Author: Peter G. Jensen
21-
*
21+
*
2222
* Created on July 18, 2017, 5:09 PM
2323
*/
2424

@@ -67,23 +67,41 @@ namespace prlearn {
6767
auto res = std::lower_bound(std::begin(_mapping), std::end(_mapping), lf);
6868
if (res == std::end(_mapping) || res->_label != label)
6969
return qvar_t(std::numeric_limits<double>::quiet_NaN(), 0, 0);
70-
assert(dimen == _dimen);
7170
auto n = _nodes[res->_nid].get_leaf(point, res->_nid, _nodes);
7271
auto& node = _nodes[n];
73-
return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q._variance);
72+
return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q.squared());
7473
}
7574

76-
double RefinementTree::getBestQ(const double* point, bool minimization) const {
75+
double RefinementTree::getBestQ(const double* point, bool minimization, size_t* next_labels, size_t n_labels) const {
7776
auto val = std::numeric_limits<double>::infinity();
7877
if (!minimization)
7978
val = -val;
80-
for (const el_t& el : _mapping) {
81-
auto node = _nodes[el._nid].get_leaf(point, el._nid, _nodes);
82-
auto v = _nodes[node]._predictor._q.avg();
83-
if (!std::isinf(v) && !std::isnan(v))
84-
val = minimization ?
85-
std::min(v, val) :
86-
std::max(v, val);
79+
if(next_labels == nullptr)
80+
{
81+
for (const el_t& el : _mapping) {
82+
auto node = _nodes[el._nid].get_leaf(point, el._nid, _nodes);
83+
auto v = _nodes[node]._predictor._q.avg();
84+
if (!std::isinf(v) && !std::isnan(v))
85+
val = minimization ?
86+
std::min(v, val) :
87+
std::max(v, val);
88+
}
89+
}
90+
else {
91+
for(size_t i = 0; i < n_labels; ++i)
92+
{
93+
size_t j = 0;
94+
for(;j < _mapping.size() && _mapping[j]._label < next_labels[i]; ++j) {};
95+
if(j >= _mapping.size()) continue;
96+
if(_mapping[j]._label != next_labels[i]) continue;
97+
const auto& res = _mapping[j];
98+
auto node = _nodes[res._nid].get_leaf(point, res._nid, _nodes);
99+
auto v = _nodes[node]._predictor._q.avg();
100+
if (!std::isinf(v) && !std::isnan(v))
101+
val = minimization ?
102+
std::min(v, val) :
103+
std::max(v, val);
104+
}
87105
}
88106
return val;
89107
}
@@ -162,7 +180,7 @@ namespace prlearn {
162180
_predictor._data[i]._hmid += point[i];
163181
}
164182

165-
// update the split-filters
183+
// update the split-filters
166184
_predictor._data[i]._splitfilter.add(_predictor._data[i]._lowq,
167185
_predictor._data[i]._highq,
168186
delta * options._indefference,
@@ -213,12 +231,12 @@ namespace prlearn {
213231
if (nodes[slow]._predictor._q.cnt() == 0) {
214232
nodes[slow]._predictor._q.cnt() = 1;
215233
nodes[slow]._predictor._q.avg() = oq.avg();
216-
nodes[slow]._predictor._q._variance = 0;
234+
nodes[slow]._predictor._q.squared() = std::pow(oq.avg(), 2.0);
217235
}
218236
if (nodes[shigh]._predictor._q.cnt() == 0) {
219237
nodes[shigh]._predictor._q.cnt() = 1;
220238
nodes[shigh]._predictor._q.avg() = oq.avg();
221-
nodes[shigh]._predictor._q._variance = 0;
239+
nodes[shigh]._predictor._q.squared() = std::pow(oq.avg(), 2.0);
222240
}
223241
}
224242
nodes[shigh]._predictor._cnt = nodes[shigh]._predictor._q.cnt();

src/RefinementTree.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
/*
22
* Copyright Peter G. Jensen
3-
*
3+
*
44
* This program is free software: you can redistribute it and/or modify
55
* it under the terms of the GNU Lesser General Public License as published by
66
* the Free Software Foundation, either version 3 of the License, or
77
* (at your option) any later version.
8-
*
8+
*
99
* This program is distributed in the hope that it will be useful,
1010
* but WITHOUT ANY WARRANTY; without even the implied warranty of
1111
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1212
* GNU General Public License for more details.
13-
*
13+
*
1414
* You should have received a copy of the GNU Lesser General Public License
1515
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1616
*/
1717

1818

19-
/*
19+
/*
2020
* File: RefinementTree.h
2121
* Author: Peter G. Jensen
2222
*
@@ -48,7 +48,7 @@ namespace prlearn {
4848

4949
void print(std::ostream& s, size_t tabs, std::map<size_t, size_t>& edge_map) const;
5050

51-
double getBestQ(const double* val, bool minimization) const;
51+
double getBestQ(const double* val, bool minimization, size_t* next_labels = nullptr, size_t n_labels = 0) const;
5252

5353
protected:
5454

@@ -78,7 +78,7 @@ namespace prlearn {
7878
};
7979

8080
struct node_t {
81-
// we could do these two values as a single pointer
81+
// we could do these two values as a single pointer
8282
// which dynamically allocates enough space for both split and pred_t
8383
// including space for the run-time sized arrays.
8484
// however, this is at current time of writing a premature optimization.

0 commit comments

Comments
 (0)