Diffing "my code (come on!)" and "Robot Monster"

Title: my code (come on!) Robot Monster
Author: William Song Mike Russell
Submitted: 2010-11-17 09:21:55 UTC 2010-11-17 11:47:14 UTC
Status: Failed Failed
Score: 0.0 0.0
Result: Failed (timeout) Failed (timeout)
CPU Time: 0.0 0.0
Code:
function [thrustRow, thrustCol] = solver(chart, aIndex, bIndex, maxThrottle)
% Basic solver, does nothing

% Copyright 2010 The MathWorks, Inc.
[r,c]=size(chart(:,:,1));
[aRow,aCol]=ind2sub([r,c],aIndex);
[bRow,bCol]=ind2sub([r,c],bIndex);

% define the task and robot
task=struct('initialState',[aRow;aCol;0;0;0],'midState',[bRow;bCol;0;0;0],'terminalState',[aRow;aCol;0;0;1],'scale',[r,c]); % task
robot=struct('alpha',1,'gamma',1,'states',[aRow;aCol;0;0;0],'Qtable',zeros(1,2*maxThrottle*(maxThrottle+1)+1),'best',[],'state',[aRow;aCol;0;0;0],'maxThrottle',maxThrottle);

% Q-learning
while isempty(robot.best)
    robot=Qlearning(robot,task,chart,5000);
    robot=Qlearning(robot,task,chart,10);
end

thrustRow = robot.best(1,:);
thrustCol = robot.best(2,:);



%----------------------------------------------------------Qlearning.m
function robot=Qlearning(robot,task,Env,N)
% Q learning Arithmatic
scale=task.scale;           % load the task and the robot
gam=robot.gamma;
alpha=robot.alpha;
mt=robot.maxThrottle;aN=2*mt*(mt+1)+1;
as=1:aN;
si=task.initialState;
st=task.terminalState;
B=task.midState(1:2);
ss=robot.states;
for lp=1:N
    alp=alpha*exp((1-rem(lp,20))/N);
    Q=robot.Qtable;         % load the new table
    s0=si;
    step=0;
    alist=[];               % action list
   
    while any(s0([1,2,5])~=st([1,2,5]))     
        k0=find(all(repmat(s0,1,size(ss,2))==ss));
        a=policy(k0,Q,as);
        dm=ind2act(a,mt);
        s=exact(s0,dm,Env,B);          % next state

        if any(s(1:2)<1)||any(s(1:2)>scale(:))    % beyond the environment
            Q(k0,a)=-1000; break;
        elseif step>1000              % too many steps(turns)
            Q(k0,a)=-500;
        else                          % feasible
            k=find(all(repmat(s,1,size(ss,2))==ss));
            if isempty(k)             % new state
                v=0;
                Q=[Q;zeros(1,aN)];ss=[ss,s]; 
            else
                a1=policy(k,Q,as);
                v=Q(k,a1);        % V(s)
            end
            r=Rew(s,st,B)-sum(abs(dm))-1;
            D=r+gam*v-Q(k0,a);
            Q(k0,a)=Q(k0,a)+alp*D;    % update Qtable
        end
        alist=[alist,ind2act(a,mt)];
        s0=s;
        step=step+1;
    end
    % update the robot
    robot.Qtable=Q;
    if s0([1,2,5])==st([1,2,5])
        if isempty(robot.best),       % record
            robot.best=alist;
        elseif step<size(robot.best,1),
            robot.best=alist;
        end  
    end     
end
robot.states=ss;

function s=exact(s,dm,Env,B)
if all(s(1:2)==B)&&s(5)==0,s(5)=1;     % s is B
end
dwr=Env(s(1),s(2),1);dwc=Env(s(1),s(2),2);
s(3:4)=s(3:4)+dm+[dwr;dwc];
s(1:2)=s(1:2)+s(3:4);

function r=Rew(s,st,B)
if all(s(1:2)==B)&&s(5)==0,r=1000;
elseif s([1,2,5])==st([1,2,5]),r=2000;
else
    r=0;
end

function dm=ind2act(a,mt)
% index to dm
if a==1,dm=[0;0];
else
[kl,r]=ind2sub([mt*(mt+1)/2,4],a-1);
[k,l]=ind2sub([mt,mt],kl);
if k+l>mt+1,kl=flipud([k;l])-[1;1];
else
    kl=[k;l]-[0;1];
end

dm=[0,-1;1,0]^(r-1)*kl;
end

function a=policy(k,Q,as)
        
qs=Q(k,:);
q=min(qs);
if qs==q,a=1;
else
    as(qs==q)=[];qs(qs==q)=[];
    a=drnd(as,qs-q);
end


%-------------------------------------------drnd.m
function rv=drnd(x,p)
% discrete random variable
% Designer: William Song

p=p/sum(p);

F=cumsum(p);F0=[0,F(1:end-1)];
r=rand(1);
rv=x((F0<=r)&(r<F));