-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_datasets.py
56 lines (43 loc) · 1.91 KB
/
test_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import sys
import pytest
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))
from sklearn.dummy import DummyClassifier
from sslearn.base import get_dataset
from sslearn.datasets import read_csv, read_keel, save_keel, secure_dataset
from sslearn.wrapper import SelfTraining
def folder():
return os.path.join(os.path.dirname(os.path.realpath(__file__)), "example_files")
def posterior(X, y):
X_label, y_label, X_unlabel = get_dataset(X, y)
assert X_unlabel.shape[0] != 0
clf = DummyClassifier(strategy="most_frequent")
clf.fit(X_label, y_label)
clf = SelfTraining(DummyClassifier(strategy="most_frequent"))
clf.fit(X, y)
class TestDataset:
def test_read_csv(self):
X, y = read_csv(os.path.join(folder(),"abalone.csv"), format="pandas")
posterior(X, y)
X, y = read_csv(os.path.join(folder(),"abalone.csv"), format="numpy")
posterior(X, y)
def test_read_keel(self):
X, y = read_keel(os.path.join(folder(),"abalone.dat"), format="pandas")
posterior(X, y)
X, y = read_keel(os.path.join(folder(),"abalone.dat"), format="numpy")
posterior(X, y)
def test_secure_dataset(self):
X, y = read_csv(os.path.join(folder(),"abalone.csv"), format="pandas")
X_label, y_label, _ = get_dataset(X, y)
X1, y1 = secure_dataset(X_label, y_label)
with pytest.raises(ValueError):
secure_dataset(X, y)
assert (X1.values == X_label.values).all()
assert (y1 == y_label).all()
def test_save_keel(self):
X, y = read_keel(os.path.join(folder(),"abalone.dat"), format="pandas")
save_keel(X, y, os.path.join(folder(),"temp_abalone.dat"), name="abalone")
X1, y1 = read_keel(os.path.join(folder(),"temp_abalone.dat"), format="pandas")
assert (X.columns == X1.columns).all()
assert (y == y1).all()
assert (X == X1).all().all()