| Code: |
function [thrustRow, thrustCol] = solver(chart, aIndex, bIndex, maxThrottle)
% Basic solver, does nothing
% Copyright 2010 The MathWorks, Inc.
[r,c]=size(chart(:,:,1));
[aRow,aCol]=ind2sub([r,c],aIndex);
[bRow,bCol]=ind2sub([r,c],bIndex);
% define the task and robot
task=struct('initialState',[aRow;aCol;0;0;0],'midState',[bRow;bCol;0;0;0],'terminalState',[aRow;aCol;0;0;1],'scale',[r,c]); % task
robot=struct('alpha',1,'gamma',1,'states',[aRow;aCol;0;0;0],'Qtable',zeros(1,2*maxThrottle*(maxThrottle+1)+1),'best',[],'state',[aRow;aCol;0;0;0],'maxThrottle',maxThrottle);
% Q-learning
while isempty(robot.best)
robot=Qlearning(robot,task,chart,5000);
robot=Qlearning(robot,task,chart,10);
end
thrustRow = robot.best(1,:);
thrustCol = robot.best(2,:);
%----------------------------------------------------------Qlearning.m
function robot=Qlearning(robot,task,Env,N)
% Q learning Arithmatic
scale=task.scale; % load the task and the robot
gam=robot.gamma;
alpha=robot.alpha;
mt=robot.maxThrottle;aN=2*mt*(mt+1)+1;
as=1:aN;
si=task.initialState;
st=task.terminalState;
B=task.midState(1:2);
ss=robot.states;
for lp=1:N
alp=alpha*exp((1-rem(lp,20))/N);
Q=robot.Qtable; % load the new table
s0=si;
step=0;
alist=[]; % action list
while any(s0([1,2,5])~=st([1,2,5]))
k0=find(all(repmat(s0,1,size(ss,2))==ss));
a=policy(k0,Q,as);
dm=ind2act(a,mt);
s=exact(s0,dm,Env,B); % next state
if any(s(1:2)<1)||any(s(1:2)>scale(:)) % beyond the environment
Q(k0,a)=-1000; break;
elseif step>1000 % too many steps(turns)
Q(k0,a)=-500;
else % feasible
k=find(all(repmat(s,1,size(ss,2))==ss));
if isempty(k) % new state
v=0;
Q=[Q;zeros(1,aN)];ss=[ss,s];
else
a1=policy(k,Q,as);
v=Q(k,a1); % V(s)
end
r=Rew(s,st,B)-sum(abs(dm))-1;
D=r+gam*v-Q(k0,a);
Q(k0,a)=Q(k0,a)+alp*D; % update Qtable
end
alist=[alist,ind2act(a,mt)];
s0=s;
step=step+1;
end
% update the robot
robot.Qtable=Q;
if s0([1,2,5])==st([1,2,5])
if isempty(robot.best), % record
robot.best=alist;
elseif step<size(robot.best,1),
robot.best=alist;
end
end
end
robot.states=ss;
function s=exact(s,dm,Env,B)
if all(s(1:2)==B)&&s(5)==0,s(5)=1; % s is B
end
dwr=Env(s(1),s(2),1);dwc=Env(s(1),s(2),2);
s(3:4)=s(3:4)+dm+[dwr;dwc];
s(1:2)=s(1:2)+s(3:4);
function r=Rew(s,st,B)
if all(s(1:2)==B)&&s(5)==0,r=1000;
elseif s([1,2,5])==st([1,2,5]),r=2000;
else
r=0;
end
function dm=ind2act(a,mt)
% index to dm
if a==1,dm=[0;0];
else
[kl,r]=ind2sub([mt*(mt+1)/2,4],a-1);
[k,l]=ind2sub([mt,mt],kl);
if k+l>mt+1,kl=flipud([k;l])-[1;1];
else
kl=[k;l]-[0;1];
end
dm=[0,-1;1,0]^(r-1)*kl;
end
function a=policy(k,Q,as)
qs=Q(k,:);
q=min(qs);
if qs==q,a=1;
else
as(qs==q)=[];qs(qs==q)=[];
a=drnd(as,qs-q);
end
%-------------------------------------------drnd.m
function rv=drnd(x,p)
% discrete random variable
% Designer: William Song
p=p/sum(p);
F=cumsum(p);F0=[0,F(1:end-1)];
r=rand(1);
rv=x((F0<=r)&(r<F));
|