forked from staccDOTsol/sisy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment_analysis.py
159 lines (150 loc) · 5.34 KB
/
sentiment_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from keras.datasets import imdb
from keras.preprocessing import sequence
from sisy import *
# TODO : Need to make this a parameter accepting a range()
max_features = 10000
batch_size = 32
max_features = 20000
max_len = 80 # cut texts after this number of words (among top max_features most common words)
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
layout = [
('Input', {'units': max_len}),
('Embedding', {'input_dim': max_features, 'output_dim': 128, 'input_length': max_len}),
('LSTM', {'units': 128, 'dropout': 0.2, 'recurrent_dropout': 0.2}),
# ('Dense',{'units' : range(100.1000) }),
# ('Dropout',{'rate' : list(frange(0.2,0.8))}),
# ('Dense',{'units' : range(100.1000) }),
('Output', {'units': 1, 'activation': 'sigmoid'})
]
run_sisy_experiment(layout, "sentiment_analysis", (X_train,y_train), (X_test,y_test),
generations=5,
batch_size=64,
autoloop=True,
population_size=5,
epochs=3,
n_jobs=8,
loss='binary_crossentropy',
optimizer='adam',
shuffle=False)
# import logging
# import os, sys
#
# from examples.ga.dataset import get_reuters_dataset
#
# sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
#
# from keras.models import Sequential
# from keras.layers import Dense, Activation, Dropout
# from keras.datasets import boston_housing, imdb
# import keras.metrics as metrics
# import math
#
# from keras.preprocessing import sequence
# from sklearn.preprocessing import StandardScaler
#
# from minos.experiment.experiment import ExperimentSettings
# from minos.experiment.ga import run_ga_search_experiment
# from minos.experiment.training import Training, EpochStoppingCondition
# from minos.model.model import Objective, Optimizer, Metric
# from minos.model.parameter import int_param, float_param, Parameter
#
# from minos.train.utils import SimpleBatchIterator, CpuEnvironment
# from minos.train.utils import GpuEnvironment
# from minos.utils import load_best_model
#
# batch_size = 32
# max_features = 10000
# output_dim = 128
# maxlen = 80 # cut texts after this number of words (among top max_features most common words)
#
#
# def search_model(experiment_label, steps, batch_size):
#
# (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
# #
# X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
# X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
#
# batch_iterator = SimpleBatchIterator(X_train, y_train, batch_size=batch_size,autorestart=True)
# test_batch_iterator = SimpleBatchIterator(X_test, y_test, batch_size=batch_size,autorestart=True)
#
# from minos.experiment.experiment import Experiment
#
# from minos.model.model import Layout
# from minos.experiment.experiment import ExperimentParameters
# experiment_parameters = ExperimentParameters(use_default_values=True)
# experiment_settings = ExperimentSettings()
#
# training = Training(
# Objective('binary_crossentropy'),
# Optimizer(optimizer='Adam'),
# Metric('accuracy'),
# EpochStoppingCondition(10),
# batch_size)
#
# layout = Layout(
# maxlen,
# 1,
# output_activation='sigmoid',
# block=[
# ('Embedding', { 'input_dim' : max_features , 'output_dim': 128}),
# # ('Embedding', {'input_dim': max_features }),
# # ('LSTM', { 'dropout': 0.2, 'recurrent_dropout': 0.2}),
# ('LSTM', { 'units': 128, 'dropout': 0.2, 'recurrent_dropout': 0.2}),
# ]
#
# )
# in_and_outs = Parameter(
# int,
# lo=100,
# hi=200,
# mutable=False)
#
# # experiment_parameters.layer_parameter('LSTM.units', in_and_outs)
# # experiment_parameters.layer_parameter('Embedding.output_dim', in_and_outs)
#
#
# experiment_parameters.layout_parameter('rows', 1)
# experiment_parameters.layout_parameter('blocks', 1)
# experiment_parameters.layout_parameter('layers', 1)
#
#
# experiment_settings.ga['population_size'] = 5
# experiment_settings.ga['generations'] = steps
# experiment_settings.ga['p_offspring'] = 1
# experiment_settings.ga['p_mutation'] = 1
#
# experiment = Experiment(
# experiment_label,
# layout=layout,
# training=training,
# batch_iterator=batch_iterator,
# test_batch_iterator=test_batch_iterator,
# # environment=GpuEnvironment(devices=['gpu:0'], n_jobs=10),
# environment=CpuEnvironment(),
# parameters=experiment_parameters,
# settings=experiment_settings
# )
#
# run_ga_search_experiment(
# experiment,
# resume=False,
# log_level='DEBUG')
# return load_best_model(experiment_label, steps - 1, X_train, y_train, X_test, y_test)
#
#
# def main():
# label = 'sentiment_analysis'
# steps = 4
#
# # Load the model if it exists, otherwise do a new search
# # try:
# # model = load_best_model(label, steps - 1, X_train, y_train, X_test, y_test, batch_size=1, epochs=10)
# # except Exception:
# model = search_model(label, steps,batch_size)
#
#
#
# main()