-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathidea_myo_ana.m
30 lines (27 loc) · 1015 Bytes
/
idea_myo_ana.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
function [tdr_myo_ana] = idea_myo_ana()
% Analyze the myopic scheme (idealized)
% Declare global variables
% See main_without_retran.m
global D
global num_state initial_belief actions
global transF rewards
% Compute the expected total reward from slot 1 to slot D
% U_myo: the value function corresponding to the myopic policy
% pi_myo: the myopic policy
U_myo = zeros(num_state, D+1);
pi_myo = zeros(num_state, 1);
for si = 1:num_state % si: the index of the state at the present slot
[U_myo(si, D), pi_myo(si)] = max(rewards(si, :));
end
for t = D-1:-1:1
for si = 1:num_state % si: the index of the state at the present slot
U_myo(si, t) = rewards(si, pi_myo(si)) + transF(si, :, pi_myo(si)) * U_myo(:, t+1);
end
end
pi_myo = actions(pi_myo);
% Compute the TDR performance
tdr_myo_ana = dot(initial_belief, U_myo(:, 1));
tdr_myo_sim = idea_myo_sim(pi_myo);
% Print the TDR performance
fprintf("tdr_myo_ana (idea) = %.4f\n", tdr_myo_ana);
fprintf("tdr_myo_sim (idea) = %.4f\n", tdr_myo_sim);