Skip to content

Commit

Permalink
Merge pull request #249 from stuart-cls/fix-average-group-metas
Browse files Browse the repository at this point in the history
OWAverage: Fix average group handling of metas and unknowns
  • Loading branch information
markotoplak authored Nov 23, 2018
2 parents 9543da6 + 18491dc commit ec42203
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 6 deletions.
66 changes: 64 additions & 2 deletions orangecontrib/spectroscopy/tests/test_owaverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,28 @@ def test_nan_propagation(self):
self.assertFalse(np.any(np.isnan(out.X[:, 2:])))

def test_average_by_group(self):
# Alter collagen domain to have ContinuousVariable in metas
self.send_signal("Data", self.collagen)
gvar = self.widget.group_var = self.collagen.domain.class_var
self.widget.grouping_changed()
out = self.get_output("Averages")
self.assertEqual(out.X.shape[0], len(gvar.values))
self.assertEqual(out.X.shape[1], self.collagen.X.shape[1])
# First 195 rows are labelled "collagen"
collagen_avg = np.mean(self.collagen.X[:195], axis=0)
np.testing.assert_equal(out.X[1,], collagen_avg)

def test_average_by_group_metas(self):
# Alter collagen domain to have Continuous/String/TimeVariables in metas
c_domain = self.collagen.domain
str_var = Orange.data.StringVariable.make(name="stringtest")
time_var = Orange.data.TimeVariable.make(name="timetest")
n_domain = Orange.data.Domain(c_domain.attributes,
c_domain.class_vars,
[c_domain.attributes[0]])
[c_domain.attributes[0], str_var, time_var])
collagen = self.collagen.transform(n_domain)
collagen.metas[:, 0] = np.atleast_2d(collagen.X[:, 0])
collagen.metas[:, 1] = ["string"] * len(collagen)
collagen.metas[:, 2] = [1560.3] * len(collagen)

self.send_signal("Data", collagen)
gvar = self.widget.group_var = collagen.domain.class_var
Expand All @@ -70,3 +85,50 @@ def test_average_by_group(self):
np.testing.assert_allclose(out[0, 0], out[0, -1])
# Other variables keep first if all the same
self.assertEqual(collagen[0, gvar], out[1, gvar])
self.assertEqual(collagen[0, str_var], out[1, str_var])
np.testing.assert_allclose(collagen[0, time_var], out[1, time_var])

def test_average_by_group_unknown(self):
# Alter collagen to have some unknowns in "type" variable
collagen = self.collagen.copy()
gvar = collagen.domain.class_var
index_unknowns = [3, 15, 100, 500, 650]
collagen[index_unknowns, gvar] = Orange.data.Unknown

self.send_signal("Data", collagen)
self.widget.group_var = gvar
self.widget.grouping_changed()
out = self.get_output("Averages")
self.assertEqual(out.X.shape[0], len(gvar.values) + 1)
unknown_avg = np.mean(collagen.X[index_unknowns], axis=0)
np.testing.assert_equal(out.X[4,], unknown_avg)

def test_average_by_group_missing(self):
# Alter collagen to have a "type" variable value with no members
gvar = self.collagen.domain.class_var
svfilter = Orange.data.filter.SameValue(gvar, gvar.values[0], negate=True)
collagen = svfilter(self.collagen)

self.send_signal("Data", collagen)
self.widget.group_var = gvar
self.widget.grouping_changed()
out = self.get_output("Averages")
self.assertEqual(out.X.shape[0], len(gvar.values) - 1)

def test_average_by_group_objectvar(self):
# Test with group_var in metas (object array)
gvar = self.collagen.domain.class_var
c_domain = self.collagen.domain
str_var = Orange.data.StringVariable.make(name="stringtest")
n_domain = Orange.data.Domain(c_domain.attributes,
None,
[c_domain.class_var, str_var])
collagen = self.collagen.transform(n_domain)
# collagen.metas[:, 1] = np.atleast_2d(self.collagen.Y)
self.send_signal("Data", collagen)
self.widget.group_var = gvar
self.widget.grouping_changed()
out = self.get_output("Averages")
# First 195 rows are labelled "collagen"
collagen_avg = np.mean(self.collagen.X[:195], axis=0)
np.testing.assert_equal(out.X[1,], collagen_avg)
25 changes: 21 additions & 4 deletions orangecontrib/spectroscopy/widgets/owaverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np

import Orange.data
from Orange.data.filter import SameValue
from Orange.data.filter import SameValue, FilterDiscrete, Values
from Orange.widgets.widget import OWWidget, Msg, Input, Output
from Orange.widgets import gui, settings
from Orange.widgets.utils.itemmodels import DomainModel
Expand Down Expand Up @@ -77,17 +77,27 @@ def average_table(table):
if all are the same.
- return unknown otherwise.
"""
if len(table) == 0:
return table
mean = np.nanmean(table.X, axis=0, keepdims=True)
avg_table = Orange.data.Table.from_numpy(table.domain,
X=mean,
Y=np.atleast_2d(table.Y[0]),
metas=np.atleast_2d(table.metas[0]))
Y=np.atleast_2d(table.Y[0].copy()),
metas=np.atleast_2d(table.metas[0].copy()))
cont_vars = [var for var in table.domain.class_vars + table.domain.metas
if isinstance(var, Orange.data.ContinuousVariable)]
for var in cont_vars:
index = table.domain.index(var)
col, _ = table.get_column_view(index)
avg_table[0, index] = np.nanmean(col)
try:
avg_table[0, index] = np.nanmean(col)
except AttributeError:
# numpy.lib.nanfunctions._replace_nan just guesses and returns
# a boolean array mask for object arrays because object arrays
# do not support `isnan` (numpy-gh-9009)
# Since we know that ContinuousVariable values must be np.float64
# do an explicit cast here
avg_table[0, index] = np.nanmean(col, dtype=np.float64)

other_vars = [var for var in table.domain.class_vars + table.domain.metas
if not isinstance(var, Orange.data.ContinuousVariable)]
Expand Down Expand Up @@ -115,6 +125,13 @@ def commit(self):
svfilter = SameValue(self.group_var, value)
v_table = self.average_table(svfilter(self.data))
averages.extend(v_table)
# Using "None" as in OWSelectRows
# Values is required because FilterDiscrete doesn't have
# negate keyword or IsDefined method
deffilter = Values(conditions=[FilterDiscrete(self.group_var, None)],
negate=True)
v_table = self.average_table(deffilter(self.data))
averages.extend(v_table)
self.Outputs.averages.send(averages)


Expand Down

0 comments on commit ec42203

Please sign in to comment.