-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathidea_opt_ana.m
30 lines (27 loc) · 1.01 KB
/
idea_opt_ana.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
function [tdr_opt_ana] = idea_opt_ana()
% Analyze the optimal scheme (idealized)
% Declare global variables
% See main_without_retran.m
global D
global num_state initial_belief actions num_action
global transF rewards
% Compute the expected total reward from slot 1 to slot D
% U_opt: the value function corresponding to the optimal policy
% pi_opt: the optimal policy
U_opt = zeros(num_state, D+1);
pi_opt = zeros(num_state, D);
acTransF = zeros(num_state, num_action);
for t = D:-1:1
for si = 1:num_state % si: the index of the state at the present slot
acTransF(:, :) = transF(si, :, :);
acU = rewards(si, :) + transpose(U_opt(:, t+1)) * acTransF;
[U_opt(si, t), pi_opt(si, t)] = max(acU);
pi_opt(si, t) = actions(pi_opt(si, t));
end
end
% Compute the TDR performance
tdr_opt_ana = dot(initial_belief, U_opt(:, 1));
tdr_opt_sim = idea_opt_sim(pi_opt);
% Print the TDR performance
fprintf("tdr_opt_ana (idea) = %.4f\n", tdr_opt_ana);
fprintf("tdr_opt_sim (idea) = %.4f\n", tdr_opt_sim);