MODEL:
! Principal Components Analysis (PCA);
! The problem:
1) Given a data set in which variables display a significant
amount of multicollinearity, we want to form new variables
that are uncorrelated among themselves (i.e. orthogonal),
2) Given a large data set, we want to reduce the dimension of data
to unity (e.g. composite index construction) with the minimum
loss of information;
! Based on a version by Eren Ocakverdi;
! Keywords: Principal components analysis, Factor analysis, Statistics, PCA;
SETS:
OBS;
! We allow total number of principal components to
be equal to the number of original variables;
VAR: XMEAN, PMEAN, XVAR, PVAR;
VXV( VAR, VAR): W; ! Weights used for each component;
OXV( OBS, VAR): X, PRIN;
ENDSETS
DATA:
! PCA is an appropriate technique for a data set
in which the variables are highly correlated;
OBS= 1..6;
VAR= V1 V2 V3;
X = 10 13 11
32 27 30
33 42 18
16 12 14
26 13 19
33 46 37;
ENDDATA
N = @SIZE(OBS); ! Number of observations in this data set;
! Compute the means and variances of both observed (i.e. original data) and
unobserved variables (i.e. principal components);
@FOR( VAR(J):
XMEAN(J)= @SUM( OBS(I): X(I,J))/N
);
@FOR(VAR(J):
XVAR(J) = @SUM( OBS(I): (X(I,J)-XMEAN(J))^2)/(N-1)
);
@FOR( VAR(J):
PMEAN(J)= @SUM( OBS(I): PRIN(I,J))/N
);
@FOR(VAR(J):
PVAR(J) = @SUM( OBS(I): (PRIN(I,J)-PMEAN(J))^2)/(N-1)
);
! Variances of the principal components correspond to their eigenvalues;
! Principal components are linear combinations of the original variables.
Weights are also known as eigenvectors;
@FOR( OBS(I):
! Each component K is a weighted sum of the original data;
@FOR(VAR(K):
PRIN(I,K) = @SUM( VAR(J): W(K,J)*X(I,J));
! Principal components can either be positive or negative;
@FREE(PRIN(I,K));
);
);
! Weights can either be positive or negative;
@FOR(VXV(K,J): @FREE(W(K,J)));
! First objective is to find the first principal component
that accounts for the maximum variance in the data;
[OBJ] MAX = PVAR(1); ! Or alternatively PVAR(2), PVAR(3), etc.;
! The total variance must remain unchanged;
@SUM( VAR(J): PVAR(J)) = @SUM( VAR(J): XVAR(J));
! Covariances between principal components must be zero
in order to ensure the orthogonality;
@FOR( VAR(J1):
@FOR( VAR(J2) | J2 #GT# J1:
@SUM(OBS(I): (PRIN(I,J1)- PMEAN(J1))*(PRIN(I,J2)-PMEAN(J2))) = 0;
);
);
! Sizes of the weights are constrained to prevent the
variances of components from taking large values;
@FOR( VAR(K):
@SUM( VAR(J): W(K,J)^2) = 1;
);
END
|