-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
73 lines (57 loc) · 2.53 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Author: Marlos C. Machado
import random
import argparse
import numpy as np
import scipy as sp
import scipy.stats
class ArgsParser:
"""
Read the user's input and parse the arguments properly. When returning args, each value is properly filled.
Ideally one shouldn't have to read this function to access the proper arguments, but I postpone this.
"""
@staticmethod
def read_input_args():
# Parse command line
parser = argparse.ArgumentParser(
description='Define algorithm\'s parameters.')
parser.add_argument('-i', '--input', type=str, default='mdps/toy.mdp',
help='File containing the MDP definition (default: mdps/toy.mdp).')
parser.add_argument('-o', '--output', type=str, default='graphs/',
help='Prefix that will be used to generate all outputs (default: graphs/).')
parser.add_argument('-s', '--num_seeds', type=int, default=5,
help='Number of seeds to be averaged over when appropriate (default: 30).')
parser.add_argument('-m', '--max_length_ep', type=int, default=100,
help='Maximum number of time steps an episode may last (default: 100).')
parser.add_argument('-n', '--num_episodes', type=int, default=1000,
help='Number of episodes in which learning will happen (default: 1000).')
args = parser.parse_args()
return args
def mean_confidence_interval(data, confidence=0.95):
"""
Code obtained from the link below:
https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
"""
a = 1.0 * np.array(data)
n = len(a)
m, se = np.mean(a, axis=0), scipy.stats.sem(a, axis=0)
h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
return m, m-h, m+h
def epsilon_greedy(q_values, epsilon=0.05):
"""
Regular epsilon-greedy function. It break ties randomly.
:param q_values: list of values to which we should be greedy w.r.t.
:param epsilon: probability we are going to randomly select an action
:return: index for the action to be taken
"""
length_list = len(q_values)
number = random.uniform(0, 1)
if number < epsilon:
return random.randrange(0, length_list)
else:
max_val = np.max(q_values)
# I need to break ties randomly
tmp_indx = []
for i in range(length_list):
if q_values[i] == max_val:
tmp_indx.append(i)
return random.choice(tmp_indx)