% Update the model with the iteration's simulation results
% Count how many times sp is reached from s
Task.P_s_sp_a(s,sp,a) = Task.P_s_sp_a(s,sp,a) + 1/i;
Task.R_s_a(s,a) = Task.R_s_a(s,a) + r/i;```
%% Step 2: Generate the discrete state/action space MDP model
for a = Task.A % loop over the actions
fprintf('Discrete system model for action a = %6.4f \n', U(a));
for s = Task.S % loop over states
p1 = X(1,s);
v1 = X(2,s);
for i = 1:Parameters.modeling_iter % loop over modeling iterations
p0 = p1 + ((rand - 0.5))*delta_x;%(i-1)*delta_x/(Parameters.modeling_iter) - 0.5*delta_x; % position
v0 = v1 + ((rand - 0.5))*delta_v;%(i-1)*delta_v/(Parameters.modeling_iter) - 0.5*delta_v; % velocity
action = U(:,a); % inputs
%Simulate for one time step. This function inputs and returns
%states expressed by their physical continuous values. You may
%want to use the included state_*2* functions provided to do
%this conversion.
[p1,v1,r,isTerminalState] = Mountain_Car_Single_Step(p0,v0,action); % Note: isTerminalState is nowhere needed in this scope
% Convert to index of successor state (p1, v1)
si = state_c2d([p0; v0]);
sp = state_c2d([p1; v1]);
% Update the model with the iteration's simulation results
% Count how many times sp is reached from s
Task.P_s_sp_a(si,sp,a) = Task.P_s_sp_a(si,sp,a) + 1/i;
Task.R_s_a(si,a) = Task.R_s_a(si,a) + r/i;
end % modeling_iter
end
end