function ae = AEupdate(ae, pop, ccovfac, iterdiag)
% ae = AEupdate(ae, pop, ccovfac=1, iterdiag=0)
% 
% AEupdate (Adaptive Encoding) implements a CMA-like update for a
% linear encoding that is, in principle, applicable to any
% continuous domain iterative (search) algorithm.
% 
% INPUT: 
%   AE: an "adaptive encoding object". If AE is empty, AEupdate 
%      returns a new adaptive encoding "object". ae.B contains
%      the linear "(de-)coding", i.e. the fitness functions  
%      should be called like fitness(ae.B * x). ae.invB is the 
%      respective inverse. 
%   POP: DIM by POPSIZE array of most recent best
%      solutions. DIM is the search space dimensionality, 
%      the number of solutions POPSIZE might well be one. 
%      Only for non-uniform weights (see code below) an ordering
%      of the solutions regarding their fitness is required. 
%   CCOVFAC: factor for learning rate, default is one (about
%      ten times slower than default for CMA-ES). CCOVFAC depends
%      on the algorithm to which AEupdate is applied to and needs 
%      to be identified in advance, see reference. 
%   ITERDIAG: number of initial iterations, where the transformation 
%      is diagonal and therefore no rotation is applied. 
%      iterdiag=-1 invokes 200*DIM/POPSIZE. 
% OUTPUT: new or updated "adaptive encoding object". 
% 
% Example: 
%    x = rand(10,100);        % initial N by pop-size array x
%    opt = myoptim([], x);    % initialize optimizer, to be implemented
%    ae = AEupdate([], x);    % initialize encoding
%    for iter = 1:1e3
%       x = myoptim(opt, x, ae.B); % optimizers iteration step on x
%                                  % using fitness(ae.B * x)
%       x = ae.B * x;         % decode x
%       ae = AEupdate(ae, x); % adapt encoding 
%       x = ae.invB * x;      % encoding has changed, this could be 
%                             % the first row in the loop 
%    end
%    return ae.B * x(:,ibest) % return best solution, "decoded"
%
% REFERENCE: Hansen, N. (2008). Adaptive Encoding: How to Render
%    Search Coordinate System Invariant. In Rudolph et al. (eds.)
%    Parallel Problem Solving from Nature, PPSN X,
%    Proceedings, Springer. http://hal.inria.fr/inria-00287351/en/

version = '0.11'; 
last_change = '09/23/2008'; 

% TODO: * initial B optionally with random matrix? 
%       * find a better mapping of B between iterations? 
%         not sorting by EV but match scalar products 
%         column-wise? 

if nargin < 2 || isempty(pop)
  error('need two arguments, first can be empty');
end
N = size(pop, 1);

if nargin < 3 || isempty(ccovfac)
  ccovfac = 1;
end

if nargin < 4 || isempty(iterdiag)
  iterdiag = 0; 
end

if iterdiag < 0 
  iterdiag = 200*N/size(pop, 2);
end

  % initialize "object" 
  if isempty(ae) 
    % parameter setting
    ae.N = N; 
    ae.mu = size(pop, 2);
    ae.weights = ones(ae.mu, 1) / ae.mu; 
    ae.mucov = ae.mu; % for computing c1 and cmu
    if 11 < 3  % non-uniform weights, assumes a correct ordering of
               % input arguments
      ae.weights = log(ae.mu+1)-log(1:ae.mu)'; 
      ae.weights = ae.weights/sum(ae.weights); 
      ae.mucov = 1/sum(ae.weights.^2); 
    end
    ae.alpha_p = 1;                    % 
    ae.c1 = ccovfac*0.2/(ae.mucov+(N+1.3).^2);
    ae.cmu = ccovfac*0.2*(ae.mucov-2+1/ae.mucov)/(0.2*ae.mucov+(N+2).^2);
    ae.cc = 1/sqrt(N); 
    ae.diagonaliterations = iterdiag; 

    % initialization
    ae.countiter = 0; 
    ae.pc = zeros(N,1); 
    ae.xmean = pop * ae.weights;
    ae.C = eye(N); 
    ae.diagD = ones(N,1); 
    ae.B = eye(N);    % TODO consider optionally arbitrary orthogonal matrix
    ae.Bo = eye(N);   % usually not really necessary
    ae.invB = eye(N); % for first iteration
    return
  end % initialize object
  
  % check consistency
  if N ~= ae.N
    error(['dimensionality changed from ' num2str(ae.N) ' to ' ...
           num2str(N)]);
  end

  % begin 
  ae.countiter = ae.countiter + 1; 
  ae.xold = ae.xmean; 
  ae.xmean = pop * ae.weights;

  arnorm = sqrt(sum((ae.invB*(pop - repmat(ae.xold,1,ae.mu))).^2,1));

  alpha0 = sqrt(N) / sqrt(sum((ae.invB*(ae.xmean-ae.xold)).^2));
  alphai = sqrt(N) ./ arnorm; 
  alphai = sqrt(N) * min(1./median(arnorm), 2./arnorm); 

  % adapt the encoding
  z = alpha0 * (ae.xmean-ae.xold); 

  % for cumulation of z, no use if z is normalized anyway 
  if 11 < 3 && norm(ae.invB*z) < 0.1*norm(ae.invB*ae.pc)
    % in case of fast step size decrease
    % so far did rarely show up! 
    disp(['z is small' num2str([ norm(ae.invB*z) norm(ae.invB*ae.pc)])]);
    z = z / norm(ae.invB*z) * 0.1*norm(ae.invB*ae.pc);
  end
  ae.pc = (1-ae.cc)*ae.pc + sqrt(ae.cc*(2-ae.cc)) * z;

  % S = z * z';
  S = ae.pc * ae.pc'; % 10-D: factor three on fcigar 

  zmu =  repmat(alphai, N,1) .* (pop - repmat(ae.xold, 1, ae.mu));
  Cmu = zmu * diag(ae.weights) * zmu';  
  % trace normalization seemly puts too much emphasis in large EVs, the 
  % small directions die out
  % Mmu = trace(ae.C) / trace(Mmu) * Mmu; 

     % norm_fac = N*norm(invB*ae.pc)^-2; disp(norm_fac)
     %        ae.C = (1-ae.ccov) * ae.C + ae.ccov/ae.mucov * ae.alpha_p * S ...
     %            + ae.ccov * (1-1/ae.mucov) * Cmu;

  ae.C = (1-(ae.c1+ae.cmu)) * ae.C + ae.c1 * ae.alpha_p * S ...
       + ae.cmu * Cmu;
  if ae.countiter <= ae.diagonaliterations
    ae.C = diag(diag(ae.C)); 
    ae.Bo = eye(N); 
    EV = ae.C; % like second output arg of eig() 
  else
    ae.C = (triu(ae.C)+triu(ae.C,1)');
    [ae.Bo, EV] = eig(ae.C);
  end
  [EV idx] = sort(diag(EV)); % vector of eigenvalues
  ae.diagD = sqrt(EV); 
  ae.Bo = ae.Bo(:,idx); 

        if 11 < 3 % just a check: fails to map Bo to corresponding C from above
          idx = randperm(N);
          EV = EV(idx);
          ae.Bo = ae.Bo(:,idx); 
        end
        % B = B*sqrt(EV)*B'; % worse on fcigar as below, because no
                             % additional scaling can be exploited

  ae.B = ae.Bo * diag(ae.diagD);
  ae.invB = diag(1./ae.diagD) * ae.Bo'; 
  if 11 < 3  % orthogonal version versus full B 
    ae.B = ae.Bo;  
    ae.invB = ae.Bo';
  end

% ---------------------------------------------------------------
% Adaptive encoding. To be used under the terms of the GNU General
% Public License (http://www.gnu.org/copyleft/gpl.html).  
% Author (copyright): Nikolaus Hansen, 2008.  
% e-mail: nikolaus.hansen AT inria.fr 
% URL:http://www.bionik.tu-berlin.de/user/niko
% ---------------------------------------------------------------

