Lindo Systems

MODEL:
! Principal Components Analysis (PCA);
! The problem: 
     1) Given a data set in which variables display a significant
         amount of multicollinearity, we want to form new variables 
         that are uncorrelated among themselves (i.e. orthogonal), 
     2) Given a large data set, we want to reduce the dimension of data
         to unity (e.g. composite index construction) with the minimum 
         loss of information;
 ! Based on a version by Eren Ocakverdi;
 ! Keywords: Principal components analysis, Factor analysis, Statistics, PCA;

SETS:
  OBS;
! We allow total number of principal components to
  be equal to the number of original variables;
  VAR: XMEAN, PMEAN, XVAR, PVAR;
  VXV( VAR, VAR): W; ! Weights used for each component;
  OXV( OBS, VAR): X, PRIN;
ENDSETS

DATA:
! PCA is an appropriate technique for a data set
  in which the variables are highly correlated;
  OBS= 1..6;
  VAR= V1 V2 V3;
   X = 10 13 11
       32 27 30
       33 42 18
       16 12 14
       26 13 19
       33 46 37;
ENDDATA

N = @SIZE(OBS); ! Number of observations in this data set;

! Compute the means and variances of both observed (i.e. original data) and 
   unobserved variables (i.e. principal components);
  @FOR( VAR(J):
    XMEAN(J)= @SUM( OBS(I): X(I,J))/N
      );
  @FOR(VAR(J):
    XVAR(J) = @SUM( OBS(I): (X(I,J)-XMEAN(J))^2)/(N-1)
      );

  @FOR( VAR(J):
    PMEAN(J)= @SUM( OBS(I): PRIN(I,J))/N
      );
  @FOR(VAR(J):
    PVAR(J) = @SUM( OBS(I): (PRIN(I,J)-PMEAN(J))^2)/(N-1)
      );

! Variances of the principal components correspond to their eigenvalues;
! Principal components are linear combinations of the original variables.
   Weights are also known as eigenvectors;
   @FOR( OBS(I):
!  Each component K is a weighted sum of the original data;
     @FOR(VAR(K):
       PRIN(I,K) = @SUM( VAR(J): W(K,J)*X(I,J));
! Principal components can either be positive or negative;
       @FREE(PRIN(I,K));
         );
       );

! Weights can either be positive or negative;
   @FOR(VXV(K,J): @FREE(W(K,J)));

! First objective is to find the first principal component 
   that accounts for the maximum variance in the data;
   [OBJ] MAX = PVAR(1); ! Or alternatively PVAR(2), PVAR(3), etc.;
    
! The total variance must remain unchanged; 
   @SUM( VAR(J): PVAR(J)) = @SUM( VAR(J): XVAR(J)); 

! Covariances between principal components must be zero 
   in order to ensure the orthogonality;
   @FOR( VAR(J1):
     @FOR( VAR(J2) | J2 #GT# J1:
       @SUM(OBS(I): (PRIN(I,J1)- PMEAN(J1))*(PRIN(I,J2)-PMEAN(J2))) = 0;
         );
       );

! Sizes of the weights are constrained to prevent the 
   variances of components from taking large values;  
    @FOR( VAR(K):
      @SUM( VAR(J): W(K,J)^2) = 1;
         );
END