! Discrimant analysis with multiple categories/groups,
via linear optimization.
The problem:
We want to predict a categorical outcome/variable, e.g.,
DryWell vs. OilOnly vs. GasOnly vs. OilGas,
based on one or more explanatory variables,
given a set of observations.
Basic approach: For each item(or observation) i,
compute a score for each category or group c, c = 1, 2, ....
We classify item i to that category c that has
the highest score over all categories.
If xobs(i,j) is the value of item i for feature/variable j,
score(i,c) = beta(c,0) + beta(c,1)*xobs(i,1) + beta(c,2)*xobs(i,2)+...;
! Ref: Gochet, W., A. Stam, V. Srinivasan, and S. Chen (1997),
"Multigroup Discriminant Analysis Using Linear Programming".
Operations Research, vol. 45, no. 2, pp. 213-225;
!Keywords: Categorical regression, Classification, Clustering,
Discriminant analysis, Machine learning, Random forest, Supervised learning;
SETS:
obs; ! The rows/items;
var; ! The columns or "variables" in the statistical sense;
cat: wgt; ! The categories for the dependent var;
vardep( var); ! The dependent variable;
varexp( var); ! The set of explanatory variables;
varpltx( var); ! Variable on X axis in plot/chart/graph;
varplty( var); ! Variable on Y axis in plot/chart/graph;
catplt( cat); ! The categories to plot;
oxv(obs,var): xobs; ! The matrix of observations;
cxc( cat, cat): prbcls; ! Probability classification;
cxv( cat, var): beta; ! "Regression" coefficients;
oxc( obs, cat): score;
oxcxc( obs, cat, cat) : badness;
! Subsets useful for doing a 2-dimensional plot;
OBS1( OBS): X1, Y1;
OBS2( OBS): X2, Y2;
OBS3( OBS): X3, Y3;
ENDSETS DATA:
! Various data sets;
! Data set 1;
!wg1 cat = TYPE1 TYPE2 TYPE3; !wg1 wgt = 1 1 1; ! You may want to give more weight to observations of a specific category
if that category is not well represented in the data, or
if it is costly to misclassify that category;
!wg1 var = v1 v2 catgry; !wg1 vardep = catgry; !wg1 varexp = v1 v2 ; !wg1 varpltx = v1;
!wg1 varplty = v2;
! and which categories (up to 3) to plot;
!wg1 catplt = TYPE1 TYPE2 TYPE3;
! The names of the observations;
!wg1 obs = ob11 ob12 ob13 ob21 ob22 ob23 ob31 ob32 ob33;
! The matrix of observations. Note, the dependent variable
should take one of the values: 1, 2, 3, ...;
!wg1 xobs =
1 2 1
2 3 1
2 4 1
4 2 2
2.5 3 2
2.5 4 2
1 1 3
2 2.5 3
4 1 3
;
! Data set 2;
!wg2 cat = CAT1 CAT2 CAT3 CAT4 CAT5 ; !wg2 wgt = 1 1 1 1 1; !wg2 var = v1 v2 catgry; !wg2 vardep = catgry; !wg2 varexp = v1 v2 ; !wg2 varpltx = v1; !wg2 varplty = v2;
! and which categories (up to 3) to plot;
!wg2 catplt = CAT2 CAT3 CAT4 ;
! The names of the observations;
!wg2 obs = ob1..ob20;
! The matrix of observations;
!wg2 xobs =
0 2.6 1
1 3 1
1.4 1.8 1
1.4 2.6 1
0 1 2 !5;
!wg2 0 2 2
1 2 2
2 2 2
1 1 3
2.4 2 3
3 0.6 3
3 1.4 3
1 0 4 !13;
!wg2 2 0 4
2 0.6 4
3 0 4
0 0 5
1 0 5 !18;
!wg2 0 1 5 !wg2 0.4 0.4 5
;
! Data set 3;
!ds3 cat = CAT1 CAT2 CAT3; !ds3 wgt = 1 1 1; !ds3 var = v1 v2 catgry; !ds3 vardep = catgry ; !ds3 varexp = v1 v2; !ds3 varpltx = v1; !ds3 varplty = v2;
! and which categories (up to 3) to plot;
!ds3 catplt = CAT1 CAT2 CAT3;
! The names of the observations;
!ds3 obs = ob11 ob12 ob13 ob14 ob21 ob22 ob23 ob31 ob32;
! The matrix of observations;
!ds3 xobs =
2 3 1
5 6 1
3 8 1
2 1 1
4 6 2
1 3 2
4 4.5 2
2 4 3
4 5 3
;
! Data set 4;
!ds4 cat = CAT1 CAT2; !ds4 wgt = 1 1; !ds4 var = v1 catgry; !ds4 vardep = catgry; !ds4 varexp = v1; ! The names of the observations;
!ds4 obs = ob11 ob22;
! The matrix of observations;
!ds4 xobs =
0 1
0 2
;
! Data set from Shuichi;
!ss cat = GOOD BAD; !ss wgt = 1 1 ; ! Names of variables, including dependent;
!ss var = TEST1 TEST2 TEST3 TEST4 TEST5 TYPE;
!ss vardep = TYPE; !ss varexp = TEST1 TEST2 TEST3 TEST4 TEST5; !ss varpltx = TEST1;
!ss varplty = TEST2;
! and which categories (up to 3) to plot;
!ss catplt = GOOD BAD;
! The names of the observations;
!ss obs = 1..66;
! The matrix of observations;
!ss xobs =
! 1ss 346.7000122 247.1999969 220.5 364.1000061 311.7000122 2
! 2ss 334 313.2999878 306.5 330.8999939 311.1000061 2
! 3ss 248.3999939 189.1999969 206.8000031 334.7000122 312.5 1
! 4ss 309 291.8999939 281.2000122 346.2000122 311.1000061 1
! 5ss 328.8999939 306.2000122 259.3999939 336.3999939 310.8999939 2
! 6ss 252.8000031 248.8000031 253.8000031 321 311.7000122 1
! 7ss 313 289.7000122 292.6000061 318 311 2
! 8ss 304.8999939 115.5 284.2000122 316.5 310.5 1
! 9ss 327.8999939 330.7999878 305.7000122 332.6000061 311 1
!10ss 315.3999939 203.8999939 287.1000061 333.7999878 311.5 1
!11ss 333 270.6000061 274.2999878 379.1000061 311.2000122 2
!12ss 242.3999939 145.8999939 292.2999878 318.7000122 311.2999878 2
!13ss 124.9000015 1.1 244.1999969 345.7000122 310.7999878 2
!14ss 323.5 317.2000122 287.3999939 406.1000061 312 1
!15ss 304.2999878 191.6999969 275.7999878 331.7000122 311.5 2
!16ss 382.3999939 124.0999985 30 322.5 316.7000122 1
!17ss 327 275.3999939 290.6000061 345.5 313.3999939 1
!18ss 278.7999878 282.1000061 316.2999878 317 311.2999878 2
!19ss 324.1000061 261.7999878 316.7999878 326.6000061 311.6000061 2
!20ss 249.3999939 260.7999878 292.7999878 317.2000122 310.2999878 1
!21ss 336.2000122 290.7999878 273.2999878 400.3999939 310.7999878 1
!22ss 317 291.8999939 303.5 326.5 310.8999939 2
!23ss 256.8999939 212 282 336.6000061 311.7000122 1
!24ss 292.7999878 181 295.7999878 577.9000244 311.2999878 2
!25ss 342.7000122 306 294.2000122 487.3999939 312.1000061 2
!26ss 336.7000122 301.2999878 273.7000122 342.5 312.7999878 2
!27ss 302.2999878 250.8000031 297.2000122 331.2999878 312.1000061 2
!28ss 328 296.8999939 292.3999939 324.6000061 310.8999939 1
!29ss 312 272 311.6000061 317.7000122 311.2000122 1
!30ss 274.7000122 252.1000061 310.7000122 323.7000122 310.7999878 2
!31ss 315.1000061 301.2000122 300.8999939 410.8999939 310.8999939 2
!32ss 310 245.3000031 306 310.7000122 310.1000061 2
!33ss 335.2000122 298.6000061 314.7999878 317 310.8999939 1
!34ss 345.2000122 353 326.3999939 409.1000061 311.2999878 1
!35ss 348.7999878 357 326 436.5 311.8999939 2
!36ss 324 306.7000122 314 401.7000122 312.7000122 2
!37ss 365.1000061 345 330.7999878 382.2999878 311.8999939 1
!38ss 369.2999878 356.7000122 322.6000061 1034.1 310.8999939 1
!39ss 343.6000061 330.7999878 322.5 462.7999878 312.3999939 1
!40ss 362.7999878 343 333.6000061 785.9000244 311.5 1
!41ss 355.6000061 336.1000061 320.3999939 597.9000244 312.1000061 2
!42ss 357.3999939 378.6000061 323.7999878 891.2999878 311.6000061 1
!43ss 350 347.2999878 343.3999939 538.7999878 313.5 1
!44ss 379 369 333.1000061 716 315.5 1
!45ss 344.2000122 359.6000061 333.7999878 436.6000061 311.8999939 1
!46ss 357 322.5 317 363.3999939 311.7999878 2
!47ss 325.3999939 347.2999878 344.1000061 880.0999756 311.5 1
!48ss 366.8999939 345.2999878 314.2000122 550.2999878 310.8999939 2
!49ss 353.7999878 359.5 335.1000061 425 312.6000061 2
!50ss 330.7000122 328.1000061 323.5 373.1000061 314 1
!51ss 343.7999878 341.3999939 325.7000122 454.7999878 311.8999939 2
!52ss 345.2999878 331.5 295.6000061 400 311 1
!53ss 334.3999939 318.5 315.7999878 459.1000061 311.5 2
!54ss 358.8999939 350.6000061 315.7999878 392 311.7999878 2
!55ss 359.8999939 344.6000061 336.3999939 620 311.7999878 2
!56ss 364.7999878 329.8999939 336.7000122 549.9000244 312.2999878 1
!57ss 349 327.3999939 322.6000061 370.5 311.2999878 1
!58ss 363 364.7000122 324.6000061 1081.7 311.7000122 1
!59ss 330.1000061 363.5 330.6000061 617.5 311.1000061 1
!60ss 363.7000122 345.8999939 336.3999939 599.5 312 2
!61ss 356.1000061 349.3999939 340.5 1010 311.8999939 1
!62ss 358.2999878 363.1000061 317.1000061 474.3999939 311.8999939 2
!63ss 356.7000122 349.7999878 323.7999878 539.0999756 311.2000122 2
!64ss 370.2999878 369.5 317 536.5999756 312 1
!65ss 327.8999939 326.2999878 330.3999939 415.6000061 311 2
!66ss 334.7000122 331.7000122 302.2000122 428.6000061 311.6000061 2;
! Data set 6, Separable diagonally;
!SepDiag cat = GOOD BAD; !SepDiag wgt = 1 1; !SepDiag var = xcoord ycoord catgry; !SepDiag vardep = catgry; !SepDiag varexp = xcoord ycoord; !SepDiag varpltx = xcoord; !SepDiag varplty = ycoord;
! and which categories (up to 3) to plot;
!SepDiag catplt = GOOD BAD;
! The names of the observations;
!SepDiag obs = ob01 ob02 ob03 ob04 ob05 ob06;
! The matrix of observations;
!SepDiag xobs =
1 3 1
2 2 1
3 1 1
5 7 2
6 6 2
7 5 2
;
!Fisher's Iris data ;
!Iris cat = SETOSA VERSICOLOR VIRGINICA;
!Iris wgt = 1 1 1 ; !Iris var = DUMMY SEPLEN SEPWID PETLEN PETWID SPECIES; !Iris vardep = SPECIES; !Iris varexp = SEPLEN SEPWID PETLEN PETWID ; ! Note, the DUMMY variable is disregarded;
! The two variables to plot;
!Iris varpltx = SEPLEN;
!Iris varplty = PETLEN;
! and which categories (up to 3) to plot;
!Iris !Iris ! catplt = SETOSA VERSICOLOR;
!Iris catplt = SETOSA VERSICOLOR, VIRGINICA;
! The names of the observations;
!Iris obs = 1..150;
! The matrix of observations;
!DatasetOrder SepalLength SepalWidth PetalLength PetalWidth Species;
!Iris xobs =
1 5.1 3.5 1.4 0.2 1
2 4.9 3.0 1.4 0.2 1
3 4.7 3.2 1.3 0.2 1
4 4.6 3.1 1.5 0.2 1
5 5.0 3.6 1.4 0.3 1
6 5.4 3.9 1.7 0.4 1
7 4.6 3.4 1.4 0.3 1
8 5.0 3.4 1.5 0.2 1
9 4.4 2.9 1.4 0.2 1
10 4.9 3.1 1.5 0.1 1
11 5.4 3.7 1.5 0.2 1
12 4.8 3.4 1.6 0.2 1
13 4.8 3.0 1.4 0.1 1
14 4.3 3.0 1.1 0.1 1
15 5.8 4.0 1.2 0.2 1
16 5.7 4.4 1.5 0.4 1
17 5.4 3.9 1.3 0.4 1
18 5.1 3.5 1.4 0.3 1
19 5.7 3.8 1.7 0.3 1
20 5.1 3.8 1.5 0.3 1
21 5.4 3.4 1.7 0.2 1
22 5.1 3.7 1.5 0.4 1
23 4.6 3.6 1.0 0.2 1
24 5.1 3.3 1.7 0.5 1
25 4.8 3.4 1.9 0.2 1
26 5.0 3.0 1.6 0.2 1
27 5.0 3.4 1.6 0.4 1
28 5.2 3.5 1.5 0.2 1
29 5.2 3.4 1.4 0.2 1
30 4.7 3.2 1.6 0.2 1
31 4.8 3.1 1.6 0.2 1
32 5.4 3.4 1.5 0.4 1
33 5.2 4.1 1.5 0.1 1
34 5.5 4.2 1.4 0.2 1
35 4.9 3.1 1.5 0.2 1
36 5.0 3.2 1.2 0.2 1
37 5.5 3.5 1.3 0.2 1
38 4.9 3.6 1.4 0.1 1
39 4.4 3.0 1.3 0.2 1
40 5.1 3.4 1.5 0.2 1
41 5.0 3.5 1.3 0.3 1
42 4.5 2.3 1.3 0.3 1
43 4.4 3.2 1.3 0.2 1
44 5.0 3.5 1.6 0.6 1
45 5.1 3.8 1.9 0.4 1
46 4.8 3.0 1.4 0.3 1
47 5.1 3.8 1.6 0.2 1
48 4.6 3.2 1.4 0.2 1
49 5.3 3.7 1.5 0.2 1
50 5.0 3.3 1.4 0.2 1
51 7.0 3.2 4.7 1.4 2
52 6.4 3.2 4.5 1.5 2
53 6.9 3.1 4.9 1.5 2
54 5.5 2.3 4.0 1.3 2
55 6.5 2.8 4.6 1.5 2
56 5.7 2.8 4.5 1.3 2
57 6.3 3.3 4.7 1.6 2
58 4.9 2.4 3.3 1.0 2
59 6.6 2.9 4.6 1.3 2
60 5.2 2.7 3.9 1.4 2
61 5.0 2.0 3.5 1.0 2
62 5.9 3.0 4.2 1.5 2
63 6.0 2.2 4.0 1.0 2
64 6.1 2.9 4.7 1.4 2
65 5.6 2.9 3.6 1.3 2
66 6.7 3.1 4.4 1.4 2
67 5.6 3.0 4.5 1.5 2
68 5.8 2.7 4.1 1.0 2
69 6.2 2.2 4.5 1.5 2
70 5.6 2.5 3.9 1.1 2
71 5.9 3.2 4.8 1.8 2
72 6.1 2.8 4.0 1.3 2
73 6.3 2.5 4.9 1.5 2
74 6.1 2.8 4.7 1.2 2
75 6.4 2.9 4.3 1.3 2
76 6.6 3.0 4.4 1.4 2
77 6.8 2.8 4.8 1.4 2
78 6.7 3.0 5.0 1.7 2
79 6.0 2.9 4.5 1.5 2
80 5.7 2.6 3.5 1.0 2
81 5.5 2.4 3.8 1.1 2
82 5.5 2.4 3.7 1.0 2
83 5.8 2.7 3.9 1.2 2
84 6.0 2.7 5.1 1.6 2
85 5.4 3.0 4.5 1.5 2
86 6.0 3.4 4.5 1.6 2
87 6.7 3.1 4.7 1.5 2
88 6.3 2.3 4.4 1.3 2
89 5.6 3.0 4.1 1.3 2
90 5.5 2.5 4.0 1.3 2
91 5.5 2.6 4.4 1.2 2
92 6.1 3.0 4.6 1.4 2
93 5.8 2.6 4.0 1.2 2
94 5.0 2.3 3.3 1.0 2
95 5.6 2.7 4.2 1.3 2
96 5.7 3.0 4.2 1.2 2
97 5.7 2.9 4.2 1.3 2
98 6.2 2.9 4.3 1.3 2
99 5.1 2.5 3.0 1.1 2
100 5.7 2.8 4.1 1.3 2
101 6.3 3.3 6.0 2.5 3
102 5.8 2.7 5.1 1.9 3
103 7.1 3.0 5.9 2.1 3
104 6.3 2.9 5.6 1.8 3
105 6.5 3.0 5.8 2.2 3
106 7.6 3.0 6.6 2.1 3
107 4.9 2.5 4.5 1.7 3
108 7.3 2.9 6.3 1.8 3
109 6.7 2.5 5.8 1.8 3
110 7.2 3.6 6.1 2.5 3
111 6.5 3.2 5.1 2.0 3
112 6.4 2.7 5.3 1.9 3
113 6.8 3.0 5.5 2.1 3
114 5.7 2.5 5.0 2.0 3
115 5.8 2.8 5.1 2.4 3
116 6.4 3.2 5.3 2.3 3
117 6.5 3.0 5.5 1.8 3
118 7.7 3.8 6.7 2.2 3
119 7.7 2.6 6.9 2.3 3
120 6.0 2.2 5.0 1.5 3
121 6.9 3.2 5.7 2.3 3
122 5.6 2.8 4.9 2.0 3
123 7.7 2.8 6.7 2.0 3
124 6.3 2.7 4.9 1.8 3
125 6.7 3.3 5.7 2.1 3
126 7.2 3.2 6.0 1.8 3
127 6.2 2.8 4.8 1.8 3
128 6.1 3.0 4.9 1.8 3
129 6.4 2.8 5.6 2.1 3
130 7.2 3.0 5.8 1.6 3
131 7.4 2.8 6.1 1.9 3
132 7.9 3.8 6.4 2.0 3
133 6.4 2.8 5.6 2.2 3
134 6.3 2.8 5.1 1.5 3
135 6.1 2.6 5.6 1.4 3
136 7.7 3.0 6.1 2.3 3
137 6.3 3.4 5.6 2.4 3
138 6.4 3.1 5.5 1.8 3
139 6.0 3.0 4.8 1.8 3
140 6.9 3.1 5.4 2.1 3
141 6.7 3.1 5.6 2.4 3
142 6.9 3.1 5.1 2.3 3
143 5.8 2.7 5.1 1.9 3
144 6.8 3.2 5.9 2.3 3
145 6.7 3.3 5.7 2.5 3
146 6.7 3.0 5.2 2.3 3
147 6.3 2.5 5.0 1.9 3
148 6.5 3.0 5.2 2.0 3
149 6.2 3.4 5.4 2.3 3
150 5.9 3.0 5.1 1.8 3
;
! Various data sets;
!CaCircle cat = TYPEA TYPEB;
!CaCircle wgt = 1 1; !CaCircle var = X1 X2 X1SQ X2SQ type ;
!CaCircle vardep = type;
!CaCircle !CaCircle varexp = X1 X2 X1SQ X2SQ; !Include squared terms!CaCircle varpltx = X1;
!CaCircle varplty = X2;
!CaCircle catplt = TYPEA TYPEB;
!CaCircle xobs =
22.98 21.75 528.30 473.06 2
28.27 24.90 799.05 620.01 1
21.85 22.88 477.42 523.71 2
24.90 28.34 620.01 803.19 1
20.30 23.30 412.09 542.89 2
20.30 29.60 412.09 876.16 1
18.75 22.88 351.56 523.71 2
15.70 28.34 246.49 803.19 1
17.62 21.75 310.30 473.06 2
12.33 24.90 152.09 620.01 1
17.20 20.20 295.84 408.04 2
11.10 20.20 123.21 408.04 1
17.62 18.65 310.30 347.82 2
12.33 15.50 152.09 240.25 1
18.75 17.52 351.56 306.79 2
15.70 12.06 246.49 145.43 1
20.30 17.10 412.09 292.41 2
20.30 10.80 412.09 116.64 1
21.85 17.52 477.42 306.79 2
24.90 12.06 620.01 145.43 1
22.98 18.65 528.30 347.82 2
28.27 15.50 799.05 240.25 1
23.40 20.20 547.56 408.04 2
29.50 20.20 870.25 408.04 1 ;
! Case: Unequal class sizes;
!CaUneq01 cat = TYPEA TYPEB;
!CaUneq01 !CaUneq01 wgt = 1 0.2; ! Wgt to apply to according to class!CaUneq01 var = value type ;
!CaUneq01 vardep = type;
!CaUneq01 varexp = value;
!CaUneq01 varpltx = value;
! and which categories (up to 3) to plot;
!CaUneq01 catplt = TYPEA TYPEB;
!CaUneq01 xobs =
1 1
2 2
3 2
7 2
8 2
9 2;
! Case 2 : Unequal class sizes, not separable;
!CaUneq02 cat = TYPEA TYPEB;
!CaUneq02 !CaUneq02 wgt = 0.2 1;
!CaUneq02 var = value type;
!CaUneq02 vardep = type;
!CaUneq02 varexp = value;
! The two variables to plot;
!CaUneq02 varpltx = value;
! and which categories (up to 3) to plot;
!CaUneq02 catplt = TYPEA TYPEB;
! The matrix of observations;
!CaUneq02 xobs =
1 1
2 1
3 2
7 1
8 1
9 1;
! Another inseparable case;
!CaInsep cat = TYPEA TYPEB;
!CaInsep wgt = 1 1;
!CaInsep var = value type;
!CaInsep vardep = type;
!CaInsep varexp = value;
! The two variables to plot;
!CaInsep varpltx = value;
!CaInsep varplty = value;
! and which categories (up to 3) to plot;
!CaInsep catplt = TYPEA TYPEB;
! The matrix of observations;
!CaInsep xobs =
1 2
2 2
2 1
3 1 ;
! A 2D inseparable case;
!Ca2D cat = RED GREEN;
!Ca2D wgt = 1.1 0.917 ;
!Ca2D var = X1 X2 type;
!Ca2D vardep = type;
!Ca2D varexp = X1 X2;
! The two variables to plot;
!Ca2D varpltx = X1;
!Ca2D varplty = X2;
!Ca2D catplt = RED GREEN;
!Ca2D xobs =
1 2 2
1.1 4 2
2 1 2
2 3 2
2 5 2
2 5.2 1
3 1 1
3 4 1
3.5 3 1
4 0 2
5 2 1 ;
! Illustrate separability and parsimony;
!CaSePar cat = RED GREEN;
!CaSePar wgt = 1 1;
!CaSePar var = X1 X2 type;
!CaSePar vardep = type;
!CaSePar varexp = X1 X2;
!CaSePar varpltx = X1;
!CaSePar varplty = X2;
!CaSePar catplt = RED GREEN;
!CaSePar xobs =
1 8 2
2 6 2
3 9 2
4 7 2
6 3 1
7 1 1
8 4 1
9 2 1 ;
! Genuine and counterfeit banknotes (100 Swiss Francs),
various measurements.
Banknotes BN1 to BN100 are genuine (Good=1),
all others are counterfeit (Good=2).
Dataset courtesy of H. Riedwyl, Bern, Switzerland;
!CaSwiss; cat = FAKE GOOD !CaSwiss; wgt = 1 1!CaSwiss; var =
Length Left Right Bottom Top Diagonal Good ;
!CaSwiss; vardep = Good!CaSwiss; varexp = Length Left Right Bottom Top Diagonal!CaSwiss; varpltx = bottom!CaSwiss; varplty = diagonal!CaSwiss; catplt = FAKE GOOD!CaSwiss; OBS, xobs =
BN1 214.8 131.0 131.1 9.0 9.7 141.0 1
BN2 214.6 129.7 129.7 8.1 9.5 141.7 1
BN3 214.8 129.7 129.7 8.7 9.6 142.2 1
BN4 214.8 129.7 129.6 7.5 10.4 142.0 1
BN5 215.0 129.6 129.7 10.4 7.7 141.8 1
BN6 215.7 130.8 130.5 9.0 10.1 141.4 1
BN7 215.5 129.5 129.7 7.9 9.6 141.6 1
BN8 214.5 129.6 129.2 7.2 10.7 141.7 1
BN9 214.9 129.4 129.7 8.2 11.0 141.9 1
BN10 215.2 130.4 130.3 9.2 10.0 140.7 1
BN11 215.3 130.4 130.3 7.9 11.7 141.8 1
BN12 215.1 129.5 129.6 7.7 10.5 142.2 1
BN13 215.2 130.8 129.6 7.9 10.8 141.4 1
BN14 214.7 129.7 129.7 7.7 10.9 141.7 1
BN15 215.1 129.9 129.7 7.7 10.8 141.8 1
BN16 214.5 129.8 129.8 9.3 8.5 141.6 1
BN17 214.6 129.9 130.1 8.2 9.8 141.7 1
BN18 215.0 129.9 129.7 9.0 9.0 141.9 1
BN19 215.2 129.6 129.6 7.4 11.5 141.5 1
BN20 214.7 130.2 129.9 8.6 10.0 141.9 1
BN21 215.0 129.9 129.3 8.4 10.0 141.4 1
BN22 215.6 130.5 130.0 8.1 10.3 141.6 1
BN23 215.3 130.6 130.0 8.4 10.8 141.5 1
BN24 215.7 130.2 130.0 8.7 10.0 141.6 1
BN25 215.1 129.7 129.9 7.4 10.8 141.1 1
BN26 215.3 130.4 130.4 8.0 11.0 142.3 1
BN27 215.5 130.2 130.1 8.9 9.8 142.4 1
BN28 215.1 130.3 130.3 9.8 9.5 141.9 1
BN29 215.1 130.0 130.0 7.4 10.5 141.8 1
BN30 214.8 129.7 129.3 8.3 9.0 142.0 1
BN31 215.2 130.1 129.8 7.9 10.7 141.8 1
BN32 214.8 129.7 129.7 8.6 9.1 142.3 1
BN33 215.0 130.0 129.6 7.7 10.5 140.7 1
BN34 215.6 130.4 130.1 8.4 10.3 141.0 1
BN35 215.9 130.4 130.0 8.9 10.6 141.4 1
BN36 214.6 130.2 130.2 9.4 9.7 141.8 1
BN37 215.5 130.3 130.0 8.4 9.7 141.8 1
BN38 215.3 129.9 129.4 7.9 10.0 142.0 1
BN39 215.3 130.3 130.1 8.5 9.3 142.1 1
BN40 213.9 130.3 129.0 8.1 9.7 141.3 1
BN41 214.4 129.8 129.2 8.9 9.4 142.3 1
BN42 214.8 130.1 129.6 8.8 9.9 140.9 1
BN43 214.9 129.6 129.4 9.3 9.0 141.7 1
BN44 214.9 130.4 129.7 9.0 9.8 140.9 1
BN45 214.8 129.4 129.1 8.2 10.2 141.0 1
BN46 214.3 129.5 129.4 8.3 10.2 141.8 1
BN47 214.8 129.9 129.7 8.3 10.2 141.5 1
BN48 214.8 129.9 129.7 7.3 10.9 142.0 1
BN49 214.6 129.7 129.8 7.9 10.3 141.1 1
BN50 214.5 129.0 129.6 7.8 9.8 142.0 1
BN51 214.6 129.8 129.4 7.2 10.0 141.3 1
BN52 215.3 130.6 130.0 9.5 9.7 141.1 1
BN53 214.5 130.1 130.0 7.8 10.9 140.9 1
BN54 215.4 130.2 130.2 7.6 10.9 141.6 1
BN55 214.5 129.4 129.5 7.9 10.0 141.4 1
BN56 215.2 129.7 129.4 9.2 9.4 142.0 1
BN57 215.7 130.0 129.4 9.2 10.4 141.2 1
BN58 215.0 129.6 129.4 8.8 9.0 141.1 1
BN59 215.1 130.1 129.9 7.9 11.0 141.3 1
BN60 215.1 130.0 129.8 8.2 10.3 141.4 1
BN61 215.1 129.6 129.3 8.3 9.9 141.6 1
BN62 215.3 129.7 129.4 7.5 10.5 141.5 1
BN63 215.4 129.8 129.4 8.0 10.6 141.5 1
BN64 214.5 130.0 129.5 8.0 10.8 141.4 1
BN65 215.0 130.0 129.8 8.6 10.6 141.5 1
BN66 215.2 130.6 130.0 8.8 10.6 140.8 1
BN67 214.6 129.5 129.2 7.7 10.3 141.3 1
BN68 214.8 129.7 129.3 9.1 9.5 141.5 1
BN69 215.1 129.6 129.8 8.6 9.8 141.8 1
BN70 214.9 130.2 130.2 8.0 11.2 139.6 1
BN71 213.8 129.8 129.5 8.4 11.1 140.9 1
BN72 215.2 129.9 129.5 8.2 10.3 141.4 1
BN73 215.0 129.6 130.2 8.7 10.0 141.2 1
BN74 214.4 129.9 129.6 7.5 10.5 141.8 1
BN75 215.2 129.9 129.7 7.2 10.6 142.1 1
BN76 214.1 129.6 129.3 7.6 10.7 141.7 1
BN77 214.9 129.9 130.1 8.8 10.0 141.2 1
BN78 214.6 129.8 129.4 7.4 10.6 141.0 1
BN79 215.2 130.5 129.8 7.9 10.9 140.9 1
BN80 214.6 129.9 129.4 7.9 10.0 141.8 1
BN81 215.1 129.7 129.7 8.6 10.3 140.6 1
BN82 214.9 129.8 129.6 7.5 10.3 141.0 1
BN83 215.2 129.7 129.1 9.0 9.7 141.9 1
BN84 215.2 130.1 129.9 7.9 10.8 141.3 1
BN85 215.4 130.7 130.2 9.0 11.1 141.2 1
BN86 215.1 129.9 129.6 8.9 10.2 141.5 1
BN87 215.2 129.9 129.7 8.7 9.5 141.6 1
BN88 215.0 129.6 129.2 8.4 10.2 142.1 1
BN89 214.9 130.3 129.9 7.4 11.2 141.5 1
BN90 215.0 129.9 129.7 8.0 10.5 142.0 1
BN91 214.7 129.7 129.3 8.6 9.6 141.6 1
BN92 215.4 130.0 129.9 8.5 9.7 141.4 1
BN93 214.9 129.4 129.5 8.2 9.9 141.5 1
BN94 214.5 129.5 129.3 7.4 10.7 141.5 1
BN95 214.7 129.6 129.5 8.3 10.0 142.0 1
BN96 215.6 129.9 129.9 9.0 9.5 141.7 1
BN97 215.0 130.4 130.3 9.1 10.2 141.1 1
BN98 214.4 129.7 129.5 8.0 10.3 141.2 1
BN99 215.1 130.0 129.8 9.1 10.2 141.5 1
BN100 214.7 130.0 129.4 7.8 10.0 141.2 1
BN101 214.4 130.1 130.3 9.7 11.7 139.8 2
BN102 214.9 130.5 130.2 11.0 11.5 139.5 2
BN103 214.9 130.3 130.1 8.7 11.7 140.2 2
BN104 215.0 130.4 130.6 9.9 10.9 140.3 2
BN105 214.7 130.2 130.3 11.8 10.9 139.7 2
BN106 215.0 130.2 130.2 10.6 10.7 139.9 2
BN107 215.3 130.3 130.1 9.3 12.1 140.2 2
BN108 214.8 130.1 130.4 9.8 11.5 139.9 2
BN109 215.0 130.2 129.9 10.0 11.9 139.4 2
BN110 215.2 130.6 130.8 10.4 11.2 140.3 2
BN111 215.2 130.4 130.3 8.0 11.5 139.2 2
BN112 215.1 130.5 130.3 10.6 11.5 140.1 2
BN113 215.4 130.7 131.1 9.7 11.8 140.6 2
BN114 214.9 130.4 129.9 11.4 11.0 139.9 2
BN115 215.1 130.3 130.0 10.6 10.8 139.7 2
BN116 215.5 130.4 130.0 8.2 11.2 139.2 2
BN117 214.7 130.6 130.1 11.8 10.5 139.8 2
BN118 214.7 130.4 130.1 12.1 10.4 139.9 2
BN119 214.8 130.5 130.2 11.0 11.0 140.0 2
BN120 214.4 130.2 129.9 10.1 12.0 139.2 2
BN121 214.8 130.3 130.4 10.1 12.1 139.6 2
BN122 215.1 130.6 130.3 12.3 10.2 139.6 2
BN123 215.3 130.8 131.1 11.6 10.6 140.2 2
BN124 215.1 130.7 130.4 10.5 11.2 139.7 2
BN125 214.7 130.5 130.5 9.9 10.3 140.1 2
BN126 214.9 130.0 130.3 10.2 11.4 139.6 2
BN127 215.0 130.4 130.4 9.4 11.6 140.2 2
BN128 215.5 130.7 130.3 10.2 11.8 140.0 2
BN129 215.1 130.2 130.2 10.1 11.3 140.3 2
BN130 214.5 130.2 130.6 9.8 12.1 139.9 2
BN131 214.3 130.2 130.0 10.7 10.5 139.8 2
BN132 214.5 130.2 129.8 12.3 11.2 139.2 2
BN133 214.9 130.5 130.2 10.6 11.5 139.9 2
BN134 214.6 130.2 130.4 10.5 11.8 139.7 2
BN135 214.2 130.0 130.2 11.0 11.2 139.5 2
BN136 214.8 130.1 130.1 11.9 11.1 139.5 2
BN137 214.6 129.8 130.2 10.7 11.1 139.4 2
BN138 214.9 130.7 130.3 9.3 11.2 138.3 2
BN139 214.6 130.4 130.4 11.3 10.8 139.8 2
BN140 214.5 130.5 130.2 11.8 10.2 139.6 2
BN141 214.8 130.2 130.3 10.0 11.9 139.3 2
BN142 214.7 130.0 129.4 10.2 11.0 139.2 2
BN143 214.6 130.2 130.4 11.2 10.7 139.9 2
BN144 215.0 130.5 130.4 10.6 11.1 139.9 2
BN145 214.5 129.8 129.8 11.4 10.0 139.3 2
BN146 214.9 130.6 130.4 11.9 10.5 139.8 2
BN147 215.0 130.5 130.4 11.4 10.7 139.9 2
BN148 215.3 130.6 130.3 9.3 11.3 138.1 2
BN149 214.7 130.2 130.1 10.7 11.0 139.4 2
BN150 214.9 129.9 130.0 9.9 12.3 139.4 2
BN151 214.9 130.3 129.9 11.9 10.6 139.8 2
BN152 214.6 129.9 129.7 11.9 10.1 139.0 2
BN153 214.6 129.7 129.3 10.4 11.0 139.3 2
BN154 214.5 130.1 130.1 12.1 10.3 139.4 2
BN155 214.5 130.3 130.0 11.0 11.5 139.5 2
BN156 215.1 130.0 130.3 11.6 10.5 139.7 2
BN157 214.2 129.7 129.6 10.3 11.4 139.5 2
BN158 214.4 130.1 130.0 11.3 10.7 139.2 2
BN159 214.8 130.4 130.6 12.5 10.0 139.3 2
BN160 214.6 130.6 130.1 8.1 12.1 137.9 2
BN161 215.6 130.1 129.7 7.4 12.2 138.4 2
BN162 214.9 130.5 130.1 9.9 10.2 138.1 2
BN163 214.6 130.1 130.0 11.5 10.6 139.5 2
BN164 214.7 130.1 130.2 11.6 10.9 139.1 2
BN165 214.3 130.3 130.0 11.4 10.5 139.8 2
BN166 215.1 130.3 130.6 10.3 12.0 139.7 2
BN167 216.3 130.7 130.4 10.0 10.1 138.8 2
BN168 215.6 130.4 130.1 9.6 11.2 138.6 2
BN169 214.8 129.9 129.8 9.6 12.0 139.6 2
BN170 214.9 130.0 129.9 11.4 10.9 139.7 2
BN171 213.9 130.7 130.5 8.7 11.5 137.8 2
BN172 214.2 130.6 130.4 12.0 10.2 139.6 2
BN173 214.8 130.5 130.3 11.8 10.5 139.4 2
BN174 214.8 129.6 130.0 10.4 11.6 139.2 2
BN175 214.8 130.1 130.0 11.4 10.5 139.6 2
BN176 214.9 130.4 130.2 11.9 10.7 139.0 2
BN177 214.3 130.1 130.1 11.6 10.5 139.7 2
BN178 214.5 130.4 130.0 9.9 12.0 139.6 2
BN179 214.8 130.5 130.3 10.2 12.1 139.1 2
BN180 214.5 130.2 130.4 8.2 11.8 137.8 2
BN181 215.0 130.4 130.1 11.4 10.7 139.1 2
BN182 214.8 130.6 130.6 8.0 11.4 138.7 2
BN183 215.0 130.5 130.1 11.0 11.4 139.3 2
BN184 214.6 130.5 130.4 10.1 11.4 139.3 2
BN185 214.7 130.2 130.1 10.7 11.1 139.5 2
BN186 214.7 130.4 130.0 11.5 10.7 139.4 2
BN187 214.5 130.4 130.0 8.0 12.2 138.5 2
BN188 214.8 130.0 129.7 11.4 10.6 139.2 2
BN189 214.8 129.9 130.2 9.6 11.9 139.4 2
BN190 214.6 130.3 130.2 12.7 9.1 139.2 2
BN191 215.1 130.2 129.8 10.2 12.0 139.4 2
BN192 215.4 130.5 130.6 8.8 11.0 138.6 2
BN193 214.7 130.3 130.2 10.8 11.1 139.2 2
BN194 215.0 130.5 130.3 9.6 11.0 138.5 2
BN195 214.9 130.3 130.5 11.6 10.6 139.8 2
BN196 215.0 130.4 130.3 9.9 12.1 139.6 2
BN197 215.1 130.3 129.9 10.3 11.5 139.7 2
BN198 214.8 130.3 130.4 10.6 11.1 140.0 2
BN199 214.7 130.7 130.8 11.2 11.2 139.4 2
BN200 214.3 129.9 129.9 10.2 11.5 139.6 2
;
ENDDATA
SUBMODEL classify:
! Parameters:
idpv = index of the explanatory variable,
xobs(i,j) = observed value for observation i for
explanatory variable j, n #ne# idpv,
xobs(i,idpv) = index of the category to which observation belongs;
! Decision variables:
score(i,c) = score of observation i, using scoring function of category c
;
! The scoring coefficents can be - or +;
@FOR( cxv(c,j): @FREE( beta(c,j)));
! Score of observation i according to category c;
@FOR( oxc( i, c) :
score(i,c) = beta( c, idpv) + @SUM( varexp( j): beta( c, j)*xobs( i, j));
@FREE( score( i, c));
);
! Compute badness, or scoring error, of observation i in category c relative
to all other groups c1. If observation i is in category c,
then score( i, c) should be >= score( i, c1) + 1, for c1 #ne# c,
i.e., we would like a clear gap of at least 1 between categories;
@FOR( cat( c):
@FOR( cat( c1) | c1 #ne# c:
@FOR( obs( i) | xobs( i, idpv) #eq# c:
badness(i, c, c1) >= ( score( i, c1) - score( i, c) + 1);
! This breaks ties of the type ( 3 - 2 + 1) vs ( 2 - 3 + 1)
in favor of the tie (2.5 - 2.5 + 1). All have total badness of 2;
badness( i, c, c1) >= 2*( score( i, c1) - score( i, c) + 0.5);
);
);
);
! minimize total badness or sum of misclassification scoring errors;
badtot = @SUM( oxcxc( i, c, c1) | c #ne# c1 : wgt( c) * badness( i, c, c1));
min = badtot;
ENDSUBMODEL
CALC:
@SET( 'TERSEO',1); ! Output level (0:verb, 1:terse, 2:only errors, 3:none);
! Get the index number of the dependent var;
@for( vardep( j): idpv = j);! Count observations of each type;
nobs = @SIZE(obs);
! We can arbitrarily set the beta's for one category to 0;
@FOR( var( j):
beta( 1, j) = 0.0;
);
@SOLVE( classify);
! Write a little summary report;
@WRITE( nobs,' = number of points/items/observations.', @NEWLINE(1));
! Compute probability of observation of
category i being classified as being of type j;
@FOR( cxc( k1, k2): prbcls( k1, k2) = 0);
@FOR( obs( i):
! Find predicted category of this observation;
catwin = 0;
scwin = 0;
@FOR( cat( k1):
@IFC( catwin #eq# 0 #or# score( i, k1) #gt# scwin:
catwin = k1;
scwin = score( i, k1);
);
);
cathome = xobs( i, idpv);
prbcls( cathome, catwin) = prbcls( cathome, catwin) + 1;
);
@WRITE( @NEWLINE(1));
@WRITE(' Prob( Category i Classified as j)', @NEWLINE(1));
@WRITE(' ');
@FOR( cat( j): @WRITE( @FORMAT( cat( j),'11s'),' '));
@WRITE( @NEWLINE(1));
@FOR( cat( i):
rowsum = @SUM( cat( j): prbcls( i, j));
rowsum = @SMAX( 1, rowsum);
@FOR( cat( j): prbcls( i, j) = prbcls( i, j)/ rowsum);
@WRITE( @FORMAT( cat( i),'11s'),': ');
@FOR( cat( j): @WRITE( @FORMAT( prbcls( i, j), '5.4f'),' '));
@WRITE( @NEWLINE(1));
);
@WRITE( @NEWLINE(1));
! Count number of...;
Correctpt = 0; !correctly classified points;
borderline = 0; !borderline between correct and incorrect;
@FOR( obs(i):
cathome = xobs( i, idpv);
scorehome = score( i, cathome);
scoreother = @MAX( cat(c) | c #ne# cathome: score(i,c));
@IFC( scorehome #gt# scoreother:
correctpt = correctpt + 1;
@else
@IFC( scorehome #ge# scoreother - 1:
borderline = borderline + 1;
@WRITE( i, ' is borderline ', scorehome, ' ', scoreother, @NEWLINE(1));
@else
@WRITE( i, ' is misclassified ', scorehome, ' ', scoreother, @NEWLINE(1));
);
);
);
@WRITE( correctpt,' = number strictly and correctly classified.', @NEWLINE(1));
@WRITE( borderline,' = number borderline/ambiguous.', @NEWLINE(1));
@WRITE( nobs - correctpt - borderline,' = number misclassified.', @NEWLINE(1));
@WRITE('The scoring beta:', @NEWLINE(1));
@WRITE(' Category Constant');
@FOR( var( j) | j #ne# idpv:
@write( ' ', @FORMAT( var(j),"%11.11s")); ! Always gives a field of 8 characters;
);
@WRITE( @NEWLINE(1));
@FOR( cat( c):
@WRITE( @FORMAT( cat(c),"%12.12s"), ' ');
@WRITE( @FORMAT( beta( c, idpv), '11.6f'));
@FOR( var( j) | j #ne# idpv:
@WRITE(' ', @FORMAT( beta(c,j), '11.6f'));
);
@WRITE( @NEWLINE(1));
);
@WRITE(@NEWLINE(1),' Scores for each observation:', @NEWLINE(1));
@WRITE(' Obs Cat ');
@FOR( cat( k):
@WRITE(@FORMAT( cat( k),'11s'),' ');
);
@WRITE(@NEWLINE(1));
@FOR( obs( i):
@WRITE( @FORMAT( obs( i),'5s'),' ', @FORMAT( xobs( i, idpv),'3.0f'));
@FOR( cat(k):
@WRITE( ' ',@FORMAT( score( i, k),'6.2f'));
);
@WRITE( @NEWLINE(1));
);
! Prepare to do a two-dimensional plot based on dimensions d1 and d2;
! Create set of type 1 items, with 2 dimensions in X1, Y1;
@for( varpltx( j): d1 = j); ! Get var on horizontal scale;
@for( varplty( j): d2 = j); ! Get var on vertical scale;
! Get the categories to be displayed;
cat1 = 0;
cat2 = 0;
cat3 = 0;
@FOR( catplt( k):
@IFC( cat1 #le# 0:
cat1 = k;
@else
@IFC( cat2 #le# 0:
cat2 = k;
@else
cat3 = k;
)));
! Create subsets of items for each of the categories cat1, cat2, cat3;
@FOR( OBS( I) | xobs( I, idpv) #EQ# cat1:
@INSERT( OBS1, I);
X1( I) = xobs( I, D1);
Y1( I) = xobs( I, D2);
);
! Create set of the type cat2 items, with 2 dimensions in X2, Y2;
@FOR( OBS( I) | xobs( I, idpv) #EQ# cat2:
@INSERT( OBS2, I);
X2( I) = xobs( I, D1);
Y2( I) = xobs( I, D2);
);
! Create set of the type cat3 items, with 2 dimensions in X2, Y2;
@FOR( OBS( I) | xobs( I, idpv) #EQ# cat3:
@INSERT( OBS3, I);
X3( I) = xobs( I, D1);
Y3( I) = xobs( I, D2);
);
! Now do a scatter plot;
@IFC( @SIZE( catplt) #eq# 1:
@CHARTSCATTER( 'Two-dimensional plot', !Chart title;
@FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis;
@FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis;
@FORMAT(cat( cat1),"7s"), x1, y1); !Point set 1;
);
@IFC( @SIZE( catplt) #eq# 2:
@CHARTSCATTER( 'Two-dimensional plot', !Chart title;
@FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis;
@FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis;
@FORMAT(cat( cat1),"7s"), x1, y1, !Point set 1;
@FORMAT(cat( cat2),"7s"), x2, y2); !Point set 2;
);
@IFC( @SIZE( catplt) #eq# 3:
@CHARTSCATTER( 'Two-dimensional plot', !Chart title;
@FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis;
@FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis;
@FORMAT(cat( cat1),"7s"), x1, y1, !Point set 1;
@FORMAT(cat( cat2),"7s"), x2, y2, !Point set 1;
@FORMAT(cat( cat3),"7s"), x3, y3); !Point set 2;
);
ENDCALC
|