function Gene=HUGO102(Segments)
%Done, at last...
%I know that this can not compete with the top entries, but the idea
%has kept me occupied the last few days (and nights...)
%There is still a lot of tweaking to be done here, but there is
%unfortunatelly no time left in the contest.
%I specifically opted for a very clear and readable programming style
%in expense of speed, because the idea behind this algorithm is
%pretty involved...
%Try to read it and see what you make of it.
%
%Perfomance is not so good, but acceptable given the time limits...
%The main weakness I think lies in the fact that there is an exponential
%number of trees to choose from (see the function MatchSegments) and
%because of time considerations you are forced to pick only one.
%The other possibility is of course a bug of some kind....
%
%
%Kristoffer Lindstrom
%konst@math.chalmers.se
rand('state',100000);
Segments=unique(Segments,'rows');
Segments=uint8(Segments);
AggregateCell={};
SegLength=size(Segments,2);
RemainingSegments=[];
for MatchLength=SegLength1:1:1
if ~isempty(Segments)
%Match the segments
[AggregateSubCell,RemainingSegments]=MatchSegments(Segments,MatchLength);
%Concatenate the old and new cell arrays
AggregateCell={AggregateCell{:} AggregateSubCell{:}};
end
if ~isempty(AggregateCell)
%Now match the aggregates
AggregateCell=MatchAggregates(AggregateCell,MatchLength);
end
Segments=RemainingSegments;
end
Gene=[AggregateCell{:}];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [AggregateCell,RemainingSegments]=MatchSegments(Segments,MatchLength)
%This function takes as input a list of segments and a matching length
%and returns a cell array consisting of segment matchings that have maximal
%length + a list of unmatched segments.
AggregateCell={};
NumberOfSegs=size(Segments,1);
DeleteNodes=zeros(NumberOfSegs,1);
SegTails=Segments(:,(endMatchLength+1):end);
SegHeads=Segments(:,1:MatchLength);
Tree=zeros(1,NumberOfSegs);
%Find which pairs of segments match and build a tree
for EachSegment=1:NumberOfSegs
Hit=all(repmat(SegTails(EachSegment,:),NumberOfSegs,1)==SegHeads,2);
%Don't find yourself
Hit(EachSegment)=0;
Match=find(Hit);
%The number of trees is too big. Pick a tree at random
r=randperm(length(Match));
if r
Tree(Match(r(1)))=EachSegment;
end
end
MaxComponents=MaxTreeComps(Tree);
for EachLeaf=find(MaxComponents)
Node=EachLeaf;
Aggregate=Segments(EachLeaf,:);
DeleteNodes(EachLeaf)=1;
%Trace the leaf back to the root
for i=1:MaxComponents(EachLeaf)
Node=Tree(Node);
%Put together all the segments in the tree
Aggregate=[Segments(Node,1:endMatchLength) Aggregate];
%Delete node from the list of segments
DeleteNodes(Node)=1;
end
if ~isempty(Aggregate)
AggregateCell={AggregateCell{:} Aggregate};
end
end
RemainingSegments=Segments(~DeleteNodes,:);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function AggregateCellOut=MatchAggregates(AggregateCellIn,MatchLength)
%This function takes as input a cell array of aggregates and a matching length
%and returns a cell array consisting of aggregate matchings that have maximal
%length.
%This is actually just a copy of the function MatchSegments, with the
%nessesary modifications to make work for cell arrays
AggregateCellOut=AggregateCellIn;
AggregateCell={};
NumberOfSegs=length(AggregateCellIn);
DeleteNodes=zeros(NumberOfSegs,1);
for EachCell=1:NumberOfSegs
SegTails(EachCell,:)=AggregateCellIn{EachCell}(:,endMatchLength+1:end);
SegHeads(EachCell,:)=AggregateCellIn{EachCell}(:,1:MatchLength);
end
Tree=zeros(1,NumberOfSegs);
%Find which pairs of segments match and build a tree
for EachSegment=1:NumberOfSegs
Hit=all(repmat(SegTails(EachSegment,:),NumberOfSegs,1)==SegHeads,2);
%Don't find yourself
Hit(EachSegment)=0;
Match=find(Hit);
%The number of trees is too big. Pick a tree at random
r=randperm(length(Match));
if r
Tree(Match(r(1)))=EachSegment;
end
end
MaxComponents=MaxTreeComps(Tree);
for EachLeaf=find(MaxComponents)
Node=EachLeaf;
Aggregate=AggregateCellIn{EachLeaf};
DeleteNodes(EachLeaf)=1;
%Trace the leaf back to the root
for i=1:MaxComponents(EachLeaf)
Node=Tree(Node);
%Put together all the segments in the tree
%Aggregate=[Segments(Node,1:endMatchLength) Aggregate];
Aggregate=[AggregateCellIn{Node}(1:endMatchLength) Aggregate];
%Delete node from the list of segments
DeleteNodes(Node)=1;
end
if ~isempty(Aggregate)
AggregateCell={AggregateCell{:} Aggregate};
end
end
RemainingAggregates=AggregateCellIn(~DeleteNodes);
AggregateCellOut={AggregateCell{:} RemainingAggregates{:}};
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [MaxComponents,IsARoot]=MaxTreeComps(Tree)
%Calculates MaxComponents which is a vector of tree nodes s.t. node i
%is a leaf and MaxComponents(i) is the height of the tree with leaf i.
%The leafs are guaranteed not to belong to the same connected component.
TreeLength=length(Tree);
IsARoot=~Tree;
Parents=Tree;
Parents(~Parents)=TreeLength+1;
IsALeaf=ones(1,TreeLength+1);
IsALeaf(Parents)=zeros(1,TreeLength);
IsALeaf(end)=[];
LeafHeights=zeros(1,TreeLength);
Leaf2Root=zeros(1,TreeLength);
%For each leaf, find its root and height
for EachLeaf=find(IsALeaf)
NodeIndex=EachLeaf;
Height=0;
TempTree=Tree;
TempIndex=[];
%Backpropagate from a leaf until you reach a root or a cycle
while TempTree(NodeIndex)
Height=Height+1;
TempIndex=NodeIndex;
NodeIndex=TempTree(NodeIndex);
%Delete every visited node to avoid cycles
TempTree(TempIndex)=0;
end
LeafHeights(EachLeaf)=Height;
if IsARoot(NodeIndex)
else
IsARoot(TempIndex)=1;
end
Leaf2Root(EachLeaf)=NodeIndex;
end
MaxComponents=zeros(1,TreeLength);
for EachRoot=find(IsARoot)
%Find the leafs that have the same root
ConnectedComponent=find(Leaf2Root==EachRoot);
%Among these, find the leaf with the maximum height
ComponentHeights=zeros(1,TreeLength);
ComponentHeights(ConnectedComponent)=LeafHeights(ConnectedComponent);
[MaxHeight,MaxLeaf]=max(ComponentHeights);
%These are the leafs with the maximum height that belong to different components
MaxComponents(MaxLeaf)=MaxHeight;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
