! Discrimant analysis with multiple categories/groups,
 via linear optimization.
  The problem:
    We want to predict a categorical outcome/variable, e.g.,
  DryWell vs. OilOnly vs. GasOnly vs. OilGas,
 based on one or more explanatory variables,
 given a set of observations.
  Basic approach: For each item(or observation) i, 
 compute a score for each category or group  c, c = 1, 2, ....
 We classify item i to that category c that has
 the highest score over all categories.
 If xobs(i,j) is the value of item i for feature/variable j,
 score(i,c) = beta(c,0) + beta(c,1)*xobs(i,1) + beta(c,2)*xobs(i,2)+...;
! Ref: Gochet, W.,  A. Stam, V. Srinivasan, and S. Chen (1997),
"Multigroup Discriminant Analysis Using Linear Programming".
Operations Research, vol. 45, no. 2, pp. 213-225;
!Keywords: Categorical regression, Classification, Clustering,
  Discriminant analysis, Machine learning, Random forest, Supervised learning;
SETS:
 obs;  ! The rows/items;
 var;  ! The columns or "variables" in the statistical sense;
 cat: wgt;  ! The categories for the dependent var;
 vardep( var); ! The dependent variable;
 varexp( var); ! The set of explanatory variables;
 varpltx( var); ! Variable on X axis in plot/chart/graph;
 varplty( var); ! Variable on Y axis in plot/chart/graph;
 catplt( cat);  ! The categories to plot;

 oxv(obs,var): xobs; ! The matrix of observations;
 cxc( cat, cat): prbcls; ! Probability classification;
 cxv( cat, var): beta; ! "Regression" coefficients;
 oxc( obs, cat): score;
 oxcxc( obs, cat, cat) : badness;
! Subsets useful for doing a 2-dimensional plot;
 OBS1( OBS): X1, Y1;
 OBS2( OBS): X2, Y2;
 OBS3( OBS): X3, Y3;
ENDSETS
DATA: ! Various data sets; ! Data set 1; !wg1 cat = TYPE1 TYPE2 TYPE3; !wg1 wgt = 1 1 1; ! You may want to give more weight to observations of a specific category if that category is not well represented in the data, or if it is costly to misclassify that category; !wg1 var = v1 v2 catgry; !wg1 vardep = catgry; !wg1 varexp = v1 v2 ; !wg1 varpltx = v1; !wg1 varplty = v2; ! and which categories (up to 3) to plot; !wg1 catplt = TYPE1 TYPE2 TYPE3; ! The names of the observations; !wg1 obs = ob11 ob12 ob13 ob21 ob22 ob23 ob31 ob32 ob33; ! The matrix of observations. Note, the dependent variable should take one of the values: 1, 2, 3, ...; !wg1 xobs = 1 2 1 2 3 1 2 4 1 4 2 2 2.5 3 2 2.5 4 2 1 1 3 2 2.5 3 4 1 3 ; ! Data set 2; !wg2 cat = CAT1 CAT2 CAT3 CAT4 CAT5 ; !wg2 wgt = 1 1 1 1 1; !wg2 var = v1 v2 catgry; !wg2 vardep = catgry; !wg2 varexp = v1 v2 ; !wg2 varpltx = v1; !wg2 varplty = v2; ! and which categories (up to 3) to plot; !wg2 catplt = CAT2 CAT3 CAT4 ; ! The names of the observations; !wg2 obs = ob1..ob20; ! The matrix of observations; !wg2 xobs = 0 2.6 1 1 3 1 1.4 1.8 1 1.4 2.6 1 0 1 2 !5; !wg2 0 2 2 1 2 2 2 2 2 1 1 3 2.4 2 3 3 0.6 3 3 1.4 3 1 0 4 !13; !wg2 2 0 4 2 0.6 4 3 0 4 0 0 5 1 0 5 !18; !wg2 0 1 5 !wg2 0.4 0.4 5 ; ! Data set 3; !ds3 cat = CAT1 CAT2 CAT3; !ds3 wgt = 1 1 1; !ds3 var = v1 v2 catgry; !ds3 vardep = catgry ; !ds3 varexp = v1 v2; !ds3 varpltx = v1; !ds3 varplty = v2; ! and which categories (up to 3) to plot; !ds3 catplt = CAT1 CAT2 CAT3; ! The names of the observations; !ds3 obs = ob11 ob12 ob13 ob14 ob21 ob22 ob23 ob31 ob32; ! The matrix of observations; !ds3 xobs = 2 3 1 5 6 1 3 8 1 2 1 1 4 6 2 1 3 2 4 4.5 2 2 4 3 4 5 3 ; ! Data set 4; !ds4 cat = CAT1 CAT2; !ds4 wgt = 1 1; !ds4 var = v1 catgry; !ds4 vardep = catgry; !ds4 varexp = v1; ! The names of the observations; !ds4 obs = ob11 ob22; ! The matrix of observations; !ds4 xobs = 0 1 0 2 ; ! Data set from Shuichi; !ss cat = GOOD BAD; !ss wgt = 1 1 ; ! Names of variables, including dependent; !ss var = TEST1 TEST2 TEST3 TEST4 TEST5 TYPE; !ss vardep = TYPE; !ss varexp = TEST1 TEST2 TEST3 TEST4 TEST5; !ss varpltx = TEST1; !ss varplty = TEST2; ! and which categories (up to 3) to plot; !ss catplt = GOOD BAD; ! The names of the observations; !ss obs = 1..66; ! The matrix of observations; !ss xobs = ! 1ss 346.7000122 247.1999969 220.5 364.1000061 311.7000122 2 ! 2ss 334 313.2999878 306.5 330.8999939 311.1000061 2 ! 3ss 248.3999939 189.1999969 206.8000031 334.7000122 312.5 1 ! 4ss 309 291.8999939 281.2000122 346.2000122 311.1000061 1 ! 5ss 328.8999939 306.2000122 259.3999939 336.3999939 310.8999939 2 ! 6ss 252.8000031 248.8000031 253.8000031 321 311.7000122 1 ! 7ss 313 289.7000122 292.6000061 318 311 2 ! 8ss 304.8999939 115.5 284.2000122 316.5 310.5 1 ! 9ss 327.8999939 330.7999878 305.7000122 332.6000061 311 1 !10ss 315.3999939 203.8999939 287.1000061 333.7999878 311.5 1 !11ss 333 270.6000061 274.2999878 379.1000061 311.2000122 2 !12ss 242.3999939 145.8999939 292.2999878 318.7000122 311.2999878 2 !13ss 124.9000015 1.1 244.1999969 345.7000122 310.7999878 2 !14ss 323.5 317.2000122 287.3999939 406.1000061 312 1 !15ss 304.2999878 191.6999969 275.7999878 331.7000122 311.5 2 !16ss 382.3999939 124.0999985 30 322.5 316.7000122 1 !17ss 327 275.3999939 290.6000061 345.5 313.3999939 1 !18ss 278.7999878 282.1000061 316.2999878 317 311.2999878 2 !19ss 324.1000061 261.7999878 316.7999878 326.6000061 311.6000061 2 !20ss 249.3999939 260.7999878 292.7999878 317.2000122 310.2999878 1 !21ss 336.2000122 290.7999878 273.2999878 400.3999939 310.7999878 1 !22ss 317 291.8999939 303.5 326.5 310.8999939 2 !23ss 256.8999939 212 282 336.6000061 311.7000122 1 !24ss 292.7999878 181 295.7999878 577.9000244 311.2999878 2 !25ss 342.7000122 306 294.2000122 487.3999939 312.1000061 2 !26ss 336.7000122 301.2999878 273.7000122 342.5 312.7999878 2 !27ss 302.2999878 250.8000031 297.2000122 331.2999878 312.1000061 2 !28ss 328 296.8999939 292.3999939 324.6000061 310.8999939 1 !29ss 312 272 311.6000061 317.7000122 311.2000122 1 !30ss 274.7000122 252.1000061 310.7000122 323.7000122 310.7999878 2 !31ss 315.1000061 301.2000122 300.8999939 410.8999939 310.8999939 2 !32ss 310 245.3000031 306 310.7000122 310.1000061 2 !33ss 335.2000122 298.6000061 314.7999878 317 310.8999939 1 !34ss 345.2000122 353 326.3999939 409.1000061 311.2999878 1 !35ss 348.7999878 357 326 436.5 311.8999939 2 !36ss 324 306.7000122 314 401.7000122 312.7000122 2 !37ss 365.1000061 345 330.7999878 382.2999878 311.8999939 1 !38ss 369.2999878 356.7000122 322.6000061 1034.1 310.8999939 1 !39ss 343.6000061 330.7999878 322.5 462.7999878 312.3999939 1 !40ss 362.7999878 343 333.6000061 785.9000244 311.5 1 !41ss 355.6000061 336.1000061 320.3999939 597.9000244 312.1000061 2 !42ss 357.3999939 378.6000061 323.7999878 891.2999878 311.6000061 1 !43ss 350 347.2999878 343.3999939 538.7999878 313.5 1 !44ss 379 369 333.1000061 716 315.5 1 !45ss 344.2000122 359.6000061 333.7999878 436.6000061 311.8999939 1 !46ss 357 322.5 317 363.3999939 311.7999878 2 !47ss 325.3999939 347.2999878 344.1000061 880.0999756 311.5 1 !48ss 366.8999939 345.2999878 314.2000122 550.2999878 310.8999939 2 !49ss 353.7999878 359.5 335.1000061 425 312.6000061 2 !50ss 330.7000122 328.1000061 323.5 373.1000061 314 1 !51ss 343.7999878 341.3999939 325.7000122 454.7999878 311.8999939 2 !52ss 345.2999878 331.5 295.6000061 400 311 1 !53ss 334.3999939 318.5 315.7999878 459.1000061 311.5 2 !54ss 358.8999939 350.6000061 315.7999878 392 311.7999878 2 !55ss 359.8999939 344.6000061 336.3999939 620 311.7999878 2 !56ss 364.7999878 329.8999939 336.7000122 549.9000244 312.2999878 1 !57ss 349 327.3999939 322.6000061 370.5 311.2999878 1 !58ss 363 364.7000122 324.6000061 1081.7 311.7000122 1 !59ss 330.1000061 363.5 330.6000061 617.5 311.1000061 1 !60ss 363.7000122 345.8999939 336.3999939 599.5 312 2 !61ss 356.1000061 349.3999939 340.5 1010 311.8999939 1 !62ss 358.2999878 363.1000061 317.1000061 474.3999939 311.8999939 2 !63ss 356.7000122 349.7999878 323.7999878 539.0999756 311.2000122 2 !64ss 370.2999878 369.5 317 536.5999756 312 1 !65ss 327.8999939 326.2999878 330.3999939 415.6000061 311 2 !66ss 334.7000122 331.7000122 302.2000122 428.6000061 311.6000061 2; ! Data set 6, Separable diagonally; !SepDiag cat = GOOD BAD; !SepDiag wgt = 1 1; !SepDiag var = xcoord ycoord catgry; !SepDiag vardep = catgry; !SepDiag varexp = xcoord ycoord; !SepDiag varpltx = xcoord; !SepDiag varplty = ycoord; ! and which categories (up to 3) to plot; !SepDiag catplt = GOOD BAD; ! The names of the observations; !SepDiag obs = ob01 ob02 ob03 ob04 ob05 ob06; ! The matrix of observations; !SepDiag xobs = 1 3 1 2 2 1 3 1 1 5 7 2 6 6 2 7 5 2 ; !Fisher's Iris data ; !Iris cat = SETOSA VERSICOLOR VIRGINICA; !Iris wgt = 1 1 1 ; !Iris var = DUMMY SEPLEN SEPWID PETLEN PETWID SPECIES; !Iris vardep = SPECIES; !Iris varexp = SEPLEN SEPWID PETLEN PETWID ; ! Note, the DUMMY variable is disregarded; ! The two variables to plot; !Iris varpltx = SEPLEN; !Iris varplty = PETLEN; ! and which categories (up to 3) to plot; !Iris !Iris ! catplt = SETOSA VERSICOLOR; !Iris catplt = SETOSA VERSICOLOR, VIRGINICA; ! The names of the observations; !Iris obs = 1..150; ! The matrix of observations; !DatasetOrder SepalLength SepalWidth PetalLength PetalWidth Species; !Iris xobs = 1 5.1 3.5 1.4 0.2 1 2 4.9 3.0 1.4 0.2 1 3 4.7 3.2 1.3 0.2 1 4 4.6 3.1 1.5 0.2 1 5 5.0 3.6 1.4 0.3 1 6 5.4 3.9 1.7 0.4 1 7 4.6 3.4 1.4 0.3 1 8 5.0 3.4 1.5 0.2 1 9 4.4 2.9 1.4 0.2 1 10 4.9 3.1 1.5 0.1 1 11 5.4 3.7 1.5 0.2 1 12 4.8 3.4 1.6 0.2 1 13 4.8 3.0 1.4 0.1 1 14 4.3 3.0 1.1 0.1 1 15 5.8 4.0 1.2 0.2 1 16 5.7 4.4 1.5 0.4 1 17 5.4 3.9 1.3 0.4 1 18 5.1 3.5 1.4 0.3 1 19 5.7 3.8 1.7 0.3 1 20 5.1 3.8 1.5 0.3 1 21 5.4 3.4 1.7 0.2 1 22 5.1 3.7 1.5 0.4 1 23 4.6 3.6 1.0 0.2 1 24 5.1 3.3 1.7 0.5 1 25 4.8 3.4 1.9 0.2 1 26 5.0 3.0 1.6 0.2 1 27 5.0 3.4 1.6 0.4 1 28 5.2 3.5 1.5 0.2 1 29 5.2 3.4 1.4 0.2 1 30 4.7 3.2 1.6 0.2 1 31 4.8 3.1 1.6 0.2 1 32 5.4 3.4 1.5 0.4 1 33 5.2 4.1 1.5 0.1 1 34 5.5 4.2 1.4 0.2 1 35 4.9 3.1 1.5 0.2 1 36 5.0 3.2 1.2 0.2 1 37 5.5 3.5 1.3 0.2 1 38 4.9 3.6 1.4 0.1 1 39 4.4 3.0 1.3 0.2 1 40 5.1 3.4 1.5 0.2 1 41 5.0 3.5 1.3 0.3 1 42 4.5 2.3 1.3 0.3 1 43 4.4 3.2 1.3 0.2 1 44 5.0 3.5 1.6 0.6 1 45 5.1 3.8 1.9 0.4 1 46 4.8 3.0 1.4 0.3 1 47 5.1 3.8 1.6 0.2 1 48 4.6 3.2 1.4 0.2 1 49 5.3 3.7 1.5 0.2 1 50 5.0 3.3 1.4 0.2 1 51 7.0 3.2 4.7 1.4 2 52 6.4 3.2 4.5 1.5 2 53 6.9 3.1 4.9 1.5 2 54 5.5 2.3 4.0 1.3 2 55 6.5 2.8 4.6 1.5 2 56 5.7 2.8 4.5 1.3 2 57 6.3 3.3 4.7 1.6 2 58 4.9 2.4 3.3 1.0 2 59 6.6 2.9 4.6 1.3 2 60 5.2 2.7 3.9 1.4 2 61 5.0 2.0 3.5 1.0 2 62 5.9 3.0 4.2 1.5 2 63 6.0 2.2 4.0 1.0 2 64 6.1 2.9 4.7 1.4 2 65 5.6 2.9 3.6 1.3 2 66 6.7 3.1 4.4 1.4 2 67 5.6 3.0 4.5 1.5 2 68 5.8 2.7 4.1 1.0 2 69 6.2 2.2 4.5 1.5 2 70 5.6 2.5 3.9 1.1 2 71 5.9 3.2 4.8 1.8 2 72 6.1 2.8 4.0 1.3 2 73 6.3 2.5 4.9 1.5 2 74 6.1 2.8 4.7 1.2 2 75 6.4 2.9 4.3 1.3 2 76 6.6 3.0 4.4 1.4 2 77 6.8 2.8 4.8 1.4 2 78 6.7 3.0 5.0 1.7 2 79 6.0 2.9 4.5 1.5 2 80 5.7 2.6 3.5 1.0 2 81 5.5 2.4 3.8 1.1 2 82 5.5 2.4 3.7 1.0 2 83 5.8 2.7 3.9 1.2 2 84 6.0 2.7 5.1 1.6 2 85 5.4 3.0 4.5 1.5 2 86 6.0 3.4 4.5 1.6 2 87 6.7 3.1 4.7 1.5 2 88 6.3 2.3 4.4 1.3 2 89 5.6 3.0 4.1 1.3 2 90 5.5 2.5 4.0 1.3 2 91 5.5 2.6 4.4 1.2 2 92 6.1 3.0 4.6 1.4 2 93 5.8 2.6 4.0 1.2 2 94 5.0 2.3 3.3 1.0 2 95 5.6 2.7 4.2 1.3 2 96 5.7 3.0 4.2 1.2 2 97 5.7 2.9 4.2 1.3 2 98 6.2 2.9 4.3 1.3 2 99 5.1 2.5 3.0 1.1 2 100 5.7 2.8 4.1 1.3 2 101 6.3 3.3 6.0 2.5 3 102 5.8 2.7 5.1 1.9 3 103 7.1 3.0 5.9 2.1 3 104 6.3 2.9 5.6 1.8 3 105 6.5 3.0 5.8 2.2 3 106 7.6 3.0 6.6 2.1 3 107 4.9 2.5 4.5 1.7 3 108 7.3 2.9 6.3 1.8 3 109 6.7 2.5 5.8 1.8 3 110 7.2 3.6 6.1 2.5 3 111 6.5 3.2 5.1 2.0 3 112 6.4 2.7 5.3 1.9 3 113 6.8 3.0 5.5 2.1 3 114 5.7 2.5 5.0 2.0 3 115 5.8 2.8 5.1 2.4 3 116 6.4 3.2 5.3 2.3 3 117 6.5 3.0 5.5 1.8 3 118 7.7 3.8 6.7 2.2 3 119 7.7 2.6 6.9 2.3 3 120 6.0 2.2 5.0 1.5 3 121 6.9 3.2 5.7 2.3 3 122 5.6 2.8 4.9 2.0 3 123 7.7 2.8 6.7 2.0 3 124 6.3 2.7 4.9 1.8 3 125 6.7 3.3 5.7 2.1 3 126 7.2 3.2 6.0 1.8 3 127 6.2 2.8 4.8 1.8 3 128 6.1 3.0 4.9 1.8 3 129 6.4 2.8 5.6 2.1 3 130 7.2 3.0 5.8 1.6 3 131 7.4 2.8 6.1 1.9 3 132 7.9 3.8 6.4 2.0 3 133 6.4 2.8 5.6 2.2 3 134 6.3 2.8 5.1 1.5 3 135 6.1 2.6 5.6 1.4 3 136 7.7 3.0 6.1 2.3 3 137 6.3 3.4 5.6 2.4 3 138 6.4 3.1 5.5 1.8 3 139 6.0 3.0 4.8 1.8 3 140 6.9 3.1 5.4 2.1 3 141 6.7 3.1 5.6 2.4 3 142 6.9 3.1 5.1 2.3 3 143 5.8 2.7 5.1 1.9 3 144 6.8 3.2 5.9 2.3 3 145 6.7 3.3 5.7 2.5 3 146 6.7 3.0 5.2 2.3 3 147 6.3 2.5 5.0 1.9 3 148 6.5 3.0 5.2 2.0 3 149 6.2 3.4 5.4 2.3 3 150 5.9 3.0 5.1 1.8 3 ; ! Various data sets; !CaCircle cat = TYPEA TYPEB; !CaCircle wgt = 1 1; !CaCircle var = X1 X2 X1SQ X2SQ type ; !CaCircle vardep = type; !CaCircle !CaCircle varexp = X1 X2 X1SQ X2SQ; !Include squared terms!CaCircle varpltx = X1; !CaCircle varplty = X2; !CaCircle catplt = TYPEA TYPEB; !CaCircle xobs = 22.98 21.75 528.30 473.06 2 28.27 24.90 799.05 620.01 1 21.85 22.88 477.42 523.71 2 24.90 28.34 620.01 803.19 1 20.30 23.30 412.09 542.89 2 20.30 29.60 412.09 876.16 1 18.75 22.88 351.56 523.71 2 15.70 28.34 246.49 803.19 1 17.62 21.75 310.30 473.06 2 12.33 24.90 152.09 620.01 1 17.20 20.20 295.84 408.04 2 11.10 20.20 123.21 408.04 1 17.62 18.65 310.30 347.82 2 12.33 15.50 152.09 240.25 1 18.75 17.52 351.56 306.79 2 15.70 12.06 246.49 145.43 1 20.30 17.10 412.09 292.41 2 20.30 10.80 412.09 116.64 1 21.85 17.52 477.42 306.79 2 24.90 12.06 620.01 145.43 1 22.98 18.65 528.30 347.82 2 28.27 15.50 799.05 240.25 1 23.40 20.20 547.56 408.04 2 29.50 20.20 870.25 408.04 1 ; ! Case: Unequal class sizes; !CaUneq01 cat = TYPEA TYPEB; !CaUneq01 !CaUneq01 wgt = 1 0.2; ! Wgt to apply to according to class!CaUneq01 var = value type ; !CaUneq01 vardep = type; !CaUneq01 varexp = value; !CaUneq01 varpltx = value; ! and which categories (up to 3) to plot; !CaUneq01 catplt = TYPEA TYPEB; !CaUneq01 xobs = 1 1 2 2 3 2 7 2 8 2 9 2; ! Case 2 : Unequal class sizes, not separable; !CaUneq02 cat = TYPEA TYPEB; !CaUneq02 !CaUneq02 wgt = 0.2 1; !CaUneq02 var = value type; !CaUneq02 vardep = type; !CaUneq02 varexp = value; ! The two variables to plot; !CaUneq02 varpltx = value; ! and which categories (up to 3) to plot; !CaUneq02 catplt = TYPEA TYPEB; ! The matrix of observations; !CaUneq02 xobs = 1 1 2 1 3 2 7 1 8 1 9 1; ! Another inseparable case; !CaInsep cat = TYPEA TYPEB; !CaInsep wgt = 1 1; !CaInsep var = value type; !CaInsep vardep = type; !CaInsep varexp = value; ! The two variables to plot; !CaInsep varpltx = value; !CaInsep varplty = value; ! and which categories (up to 3) to plot; !CaInsep catplt = TYPEA TYPEB; ! The matrix of observations; !CaInsep xobs = 1 2 2 2 2 1 3 1 ; ! A 2D inseparable case; !Ca2D cat = RED GREEN; !Ca2D wgt = 1.1 0.917 ; !Ca2D var = X1 X2 type; !Ca2D vardep = type; !Ca2D varexp = X1 X2; ! The two variables to plot; !Ca2D varpltx = X1; !Ca2D varplty = X2; !Ca2D catplt = RED GREEN; !Ca2D xobs = 1 2 2 1.1 4 2 2 1 2 2 3 2 2 5 2 2 5.2 1 3 1 1 3 4 1 3.5 3 1 4 0 2 5 2 1 ; ! Illustrate separability and parsimony; !CaSePar cat = RED GREEN; !CaSePar wgt = 1 1; !CaSePar var = X1 X2 type; !CaSePar vardep = type; !CaSePar varexp = X1 X2; !CaSePar varpltx = X1; !CaSePar varplty = X2; !CaSePar catplt = RED GREEN; !CaSePar xobs = 1 8 2 2 6 2 3 9 2 4 7 2 6 3 1 7 1 1 8 4 1 9 2 1 ; ! Genuine and counterfeit banknotes (100 Swiss Francs), various measurements. Banknotes BN1 to BN100 are genuine (Good=1), all others are counterfeit (Good=2). Dataset courtesy of H. Riedwyl, Bern, Switzerland; !CaSwiss; cat = FAKE GOOD !CaSwiss; wgt = 1 1!CaSwiss; var = Length Left Right Bottom Top Diagonal Good ; !CaSwiss; vardep = Good!CaSwiss; varexp = Length Left Right Bottom Top Diagonal!CaSwiss; varpltx = bottom!CaSwiss; varplty = diagonal!CaSwiss; catplt = FAKE GOOD!CaSwiss; OBS, xobs = BN1 214.8 131.0 131.1 9.0 9.7 141.0 1 BN2 214.6 129.7 129.7 8.1 9.5 141.7 1 BN3 214.8 129.7 129.7 8.7 9.6 142.2 1 BN4 214.8 129.7 129.6 7.5 10.4 142.0 1 BN5 215.0 129.6 129.7 10.4 7.7 141.8 1 BN6 215.7 130.8 130.5 9.0 10.1 141.4 1 BN7 215.5 129.5 129.7 7.9 9.6 141.6 1 BN8 214.5 129.6 129.2 7.2 10.7 141.7 1 BN9 214.9 129.4 129.7 8.2 11.0 141.9 1 BN10 215.2 130.4 130.3 9.2 10.0 140.7 1 BN11 215.3 130.4 130.3 7.9 11.7 141.8 1 BN12 215.1 129.5 129.6 7.7 10.5 142.2 1 BN13 215.2 130.8 129.6 7.9 10.8 141.4 1 BN14 214.7 129.7 129.7 7.7 10.9 141.7 1 BN15 215.1 129.9 129.7 7.7 10.8 141.8 1 BN16 214.5 129.8 129.8 9.3 8.5 141.6 1 BN17 214.6 129.9 130.1 8.2 9.8 141.7 1 BN18 215.0 129.9 129.7 9.0 9.0 141.9 1 BN19 215.2 129.6 129.6 7.4 11.5 141.5 1 BN20 214.7 130.2 129.9 8.6 10.0 141.9 1 BN21 215.0 129.9 129.3 8.4 10.0 141.4 1 BN22 215.6 130.5 130.0 8.1 10.3 141.6 1 BN23 215.3 130.6 130.0 8.4 10.8 141.5 1 BN24 215.7 130.2 130.0 8.7 10.0 141.6 1 BN25 215.1 129.7 129.9 7.4 10.8 141.1 1 BN26 215.3 130.4 130.4 8.0 11.0 142.3 1 BN27 215.5 130.2 130.1 8.9 9.8 142.4 1 BN28 215.1 130.3 130.3 9.8 9.5 141.9 1 BN29 215.1 130.0 130.0 7.4 10.5 141.8 1 BN30 214.8 129.7 129.3 8.3 9.0 142.0 1 BN31 215.2 130.1 129.8 7.9 10.7 141.8 1 BN32 214.8 129.7 129.7 8.6 9.1 142.3 1 BN33 215.0 130.0 129.6 7.7 10.5 140.7 1 BN34 215.6 130.4 130.1 8.4 10.3 141.0 1 BN35 215.9 130.4 130.0 8.9 10.6 141.4 1 BN36 214.6 130.2 130.2 9.4 9.7 141.8 1 BN37 215.5 130.3 130.0 8.4 9.7 141.8 1 BN38 215.3 129.9 129.4 7.9 10.0 142.0 1 BN39 215.3 130.3 130.1 8.5 9.3 142.1 1 BN40 213.9 130.3 129.0 8.1 9.7 141.3 1 BN41 214.4 129.8 129.2 8.9 9.4 142.3 1 BN42 214.8 130.1 129.6 8.8 9.9 140.9 1 BN43 214.9 129.6 129.4 9.3 9.0 141.7 1 BN44 214.9 130.4 129.7 9.0 9.8 140.9 1 BN45 214.8 129.4 129.1 8.2 10.2 141.0 1 BN46 214.3 129.5 129.4 8.3 10.2 141.8 1 BN47 214.8 129.9 129.7 8.3 10.2 141.5 1 BN48 214.8 129.9 129.7 7.3 10.9 142.0 1 BN49 214.6 129.7 129.8 7.9 10.3 141.1 1 BN50 214.5 129.0 129.6 7.8 9.8 142.0 1 BN51 214.6 129.8 129.4 7.2 10.0 141.3 1 BN52 215.3 130.6 130.0 9.5 9.7 141.1 1 BN53 214.5 130.1 130.0 7.8 10.9 140.9 1 BN54 215.4 130.2 130.2 7.6 10.9 141.6 1 BN55 214.5 129.4 129.5 7.9 10.0 141.4 1 BN56 215.2 129.7 129.4 9.2 9.4 142.0 1 BN57 215.7 130.0 129.4 9.2 10.4 141.2 1 BN58 215.0 129.6 129.4 8.8 9.0 141.1 1 BN59 215.1 130.1 129.9 7.9 11.0 141.3 1 BN60 215.1 130.0 129.8 8.2 10.3 141.4 1 BN61 215.1 129.6 129.3 8.3 9.9 141.6 1 BN62 215.3 129.7 129.4 7.5 10.5 141.5 1 BN63 215.4 129.8 129.4 8.0 10.6 141.5 1 BN64 214.5 130.0 129.5 8.0 10.8 141.4 1 BN65 215.0 130.0 129.8 8.6 10.6 141.5 1 BN66 215.2 130.6 130.0 8.8 10.6 140.8 1 BN67 214.6 129.5 129.2 7.7 10.3 141.3 1 BN68 214.8 129.7 129.3 9.1 9.5 141.5 1 BN69 215.1 129.6 129.8 8.6 9.8 141.8 1 BN70 214.9 130.2 130.2 8.0 11.2 139.6 1 BN71 213.8 129.8 129.5 8.4 11.1 140.9 1 BN72 215.2 129.9 129.5 8.2 10.3 141.4 1 BN73 215.0 129.6 130.2 8.7 10.0 141.2 1 BN74 214.4 129.9 129.6 7.5 10.5 141.8 1 BN75 215.2 129.9 129.7 7.2 10.6 142.1 1 BN76 214.1 129.6 129.3 7.6 10.7 141.7 1 BN77 214.9 129.9 130.1 8.8 10.0 141.2 1 BN78 214.6 129.8 129.4 7.4 10.6 141.0 1 BN79 215.2 130.5 129.8 7.9 10.9 140.9 1 BN80 214.6 129.9 129.4 7.9 10.0 141.8 1 BN81 215.1 129.7 129.7 8.6 10.3 140.6 1 BN82 214.9 129.8 129.6 7.5 10.3 141.0 1 BN83 215.2 129.7 129.1 9.0 9.7 141.9 1 BN84 215.2 130.1 129.9 7.9 10.8 141.3 1 BN85 215.4 130.7 130.2 9.0 11.1 141.2 1 BN86 215.1 129.9 129.6 8.9 10.2 141.5 1 BN87 215.2 129.9 129.7 8.7 9.5 141.6 1 BN88 215.0 129.6 129.2 8.4 10.2 142.1 1 BN89 214.9 130.3 129.9 7.4 11.2 141.5 1 BN90 215.0 129.9 129.7 8.0 10.5 142.0 1 BN91 214.7 129.7 129.3 8.6 9.6 141.6 1 BN92 215.4 130.0 129.9 8.5 9.7 141.4 1 BN93 214.9 129.4 129.5 8.2 9.9 141.5 1 BN94 214.5 129.5 129.3 7.4 10.7 141.5 1 BN95 214.7 129.6 129.5 8.3 10.0 142.0 1 BN96 215.6 129.9 129.9 9.0 9.5 141.7 1 BN97 215.0 130.4 130.3 9.1 10.2 141.1 1 BN98 214.4 129.7 129.5 8.0 10.3 141.2 1 BN99 215.1 130.0 129.8 9.1 10.2 141.5 1 BN100 214.7 130.0 129.4 7.8 10.0 141.2 1 BN101 214.4 130.1 130.3 9.7 11.7 139.8 2 BN102 214.9 130.5 130.2 11.0 11.5 139.5 2 BN103 214.9 130.3 130.1 8.7 11.7 140.2 2 BN104 215.0 130.4 130.6 9.9 10.9 140.3 2 BN105 214.7 130.2 130.3 11.8 10.9 139.7 2 BN106 215.0 130.2 130.2 10.6 10.7 139.9 2 BN107 215.3 130.3 130.1 9.3 12.1 140.2 2 BN108 214.8 130.1 130.4 9.8 11.5 139.9 2 BN109 215.0 130.2 129.9 10.0 11.9 139.4 2 BN110 215.2 130.6 130.8 10.4 11.2 140.3 2 BN111 215.2 130.4 130.3 8.0 11.5 139.2 2 BN112 215.1 130.5 130.3 10.6 11.5 140.1 2 BN113 215.4 130.7 131.1 9.7 11.8 140.6 2 BN114 214.9 130.4 129.9 11.4 11.0 139.9 2 BN115 215.1 130.3 130.0 10.6 10.8 139.7 2 BN116 215.5 130.4 130.0 8.2 11.2 139.2 2 BN117 214.7 130.6 130.1 11.8 10.5 139.8 2 BN118 214.7 130.4 130.1 12.1 10.4 139.9 2 BN119 214.8 130.5 130.2 11.0 11.0 140.0 2 BN120 214.4 130.2 129.9 10.1 12.0 139.2 2 BN121 214.8 130.3 130.4 10.1 12.1 139.6 2 BN122 215.1 130.6 130.3 12.3 10.2 139.6 2 BN123 215.3 130.8 131.1 11.6 10.6 140.2 2 BN124 215.1 130.7 130.4 10.5 11.2 139.7 2 BN125 214.7 130.5 130.5 9.9 10.3 140.1 2 BN126 214.9 130.0 130.3 10.2 11.4 139.6 2 BN127 215.0 130.4 130.4 9.4 11.6 140.2 2 BN128 215.5 130.7 130.3 10.2 11.8 140.0 2 BN129 215.1 130.2 130.2 10.1 11.3 140.3 2 BN130 214.5 130.2 130.6 9.8 12.1 139.9 2 BN131 214.3 130.2 130.0 10.7 10.5 139.8 2 BN132 214.5 130.2 129.8 12.3 11.2 139.2 2 BN133 214.9 130.5 130.2 10.6 11.5 139.9 2 BN134 214.6 130.2 130.4 10.5 11.8 139.7 2 BN135 214.2 130.0 130.2 11.0 11.2 139.5 2 BN136 214.8 130.1 130.1 11.9 11.1 139.5 2 BN137 214.6 129.8 130.2 10.7 11.1 139.4 2 BN138 214.9 130.7 130.3 9.3 11.2 138.3 2 BN139 214.6 130.4 130.4 11.3 10.8 139.8 2 BN140 214.5 130.5 130.2 11.8 10.2 139.6 2 BN141 214.8 130.2 130.3 10.0 11.9 139.3 2 BN142 214.7 130.0 129.4 10.2 11.0 139.2 2 BN143 214.6 130.2 130.4 11.2 10.7 139.9 2 BN144 215.0 130.5 130.4 10.6 11.1 139.9 2 BN145 214.5 129.8 129.8 11.4 10.0 139.3 2 BN146 214.9 130.6 130.4 11.9 10.5 139.8 2 BN147 215.0 130.5 130.4 11.4 10.7 139.9 2 BN148 215.3 130.6 130.3 9.3 11.3 138.1 2 BN149 214.7 130.2 130.1 10.7 11.0 139.4 2 BN150 214.9 129.9 130.0 9.9 12.3 139.4 2 BN151 214.9 130.3 129.9 11.9 10.6 139.8 2 BN152 214.6 129.9 129.7 11.9 10.1 139.0 2 BN153 214.6 129.7 129.3 10.4 11.0 139.3 2 BN154 214.5 130.1 130.1 12.1 10.3 139.4 2 BN155 214.5 130.3 130.0 11.0 11.5 139.5 2 BN156 215.1 130.0 130.3 11.6 10.5 139.7 2 BN157 214.2 129.7 129.6 10.3 11.4 139.5 2 BN158 214.4 130.1 130.0 11.3 10.7 139.2 2 BN159 214.8 130.4 130.6 12.5 10.0 139.3 2 BN160 214.6 130.6 130.1 8.1 12.1 137.9 2 BN161 215.6 130.1 129.7 7.4 12.2 138.4 2 BN162 214.9 130.5 130.1 9.9 10.2 138.1 2 BN163 214.6 130.1 130.0 11.5 10.6 139.5 2 BN164 214.7 130.1 130.2 11.6 10.9 139.1 2 BN165 214.3 130.3 130.0 11.4 10.5 139.8 2 BN166 215.1 130.3 130.6 10.3 12.0 139.7 2 BN167 216.3 130.7 130.4 10.0 10.1 138.8 2 BN168 215.6 130.4 130.1 9.6 11.2 138.6 2 BN169 214.8 129.9 129.8 9.6 12.0 139.6 2 BN170 214.9 130.0 129.9 11.4 10.9 139.7 2 BN171 213.9 130.7 130.5 8.7 11.5 137.8 2 BN172 214.2 130.6 130.4 12.0 10.2 139.6 2 BN173 214.8 130.5 130.3 11.8 10.5 139.4 2 BN174 214.8 129.6 130.0 10.4 11.6 139.2 2 BN175 214.8 130.1 130.0 11.4 10.5 139.6 2 BN176 214.9 130.4 130.2 11.9 10.7 139.0 2 BN177 214.3 130.1 130.1 11.6 10.5 139.7 2 BN178 214.5 130.4 130.0 9.9 12.0 139.6 2 BN179 214.8 130.5 130.3 10.2 12.1 139.1 2 BN180 214.5 130.2 130.4 8.2 11.8 137.8 2 BN181 215.0 130.4 130.1 11.4 10.7 139.1 2 BN182 214.8 130.6 130.6 8.0 11.4 138.7 2 BN183 215.0 130.5 130.1 11.0 11.4 139.3 2 BN184 214.6 130.5 130.4 10.1 11.4 139.3 2 BN185 214.7 130.2 130.1 10.7 11.1 139.5 2 BN186 214.7 130.4 130.0 11.5 10.7 139.4 2 BN187 214.5 130.4 130.0 8.0 12.2 138.5 2 BN188 214.8 130.0 129.7 11.4 10.6 139.2 2 BN189 214.8 129.9 130.2 9.6 11.9 139.4 2 BN190 214.6 130.3 130.2 12.7 9.1 139.2 2 BN191 215.1 130.2 129.8 10.2 12.0 139.4 2 BN192 215.4 130.5 130.6 8.8 11.0 138.6 2 BN193 214.7 130.3 130.2 10.8 11.1 139.2 2 BN194 215.0 130.5 130.3 9.6 11.0 138.5 2 BN195 214.9 130.3 130.5 11.6 10.6 139.8 2 BN196 215.0 130.4 130.3 9.9 12.1 139.6 2 BN197 215.1 130.3 129.9 10.3 11.5 139.7 2 BN198 214.8 130.3 130.4 10.6 11.1 140.0 2 BN199 214.7 130.7 130.8 11.2 11.2 139.4 2 BN200 214.3 129.9 129.9 10.2 11.5 139.6 2 ; ENDDATA SUBMODEL classify: ! Parameters: idpv = index of the explanatory variable, xobs(i,j) = observed value for observation i for explanatory variable j, n #ne# idpv, xobs(i,idpv) = index of the category to which observation belongs; ! Decision variables: score(i,c) = score of observation i, using scoring function of category c ; ! The scoring coefficents can be - or +; @FOR( cxv(c,j): @FREE( beta(c,j))); ! Score of observation i according to category c; @FOR( oxc( i, c) : score(i,c) = beta( c, idpv) + @SUM( varexp( j): beta( c, j)*xobs( i, j)); @FREE( score( i, c)); ); ! Compute badness, or scoring error, of observation i in category c relative to all other groups c1. If observation i is in category c, then score( i, c) should be >= score( i, c1) + 1, for c1 #ne# c, i.e., we would like a clear gap of at least 1 between categories; @FOR( cat( c): @FOR( cat( c1) | c1 #ne# c: @FOR( obs( i) | xobs( i, idpv) #eq# c: badness(i, c, c1) >= ( score( i, c1) - score( i, c) + 1); ! This breaks ties of the type ( 3 - 2 + 1) vs ( 2 - 3 + 1) in favor of the tie (2.5 - 2.5 + 1). All have total badness of 2; badness( i, c, c1) >= 2*( score( i, c1) - score( i, c) + 0.5); ); ); ); ! minimize total badness or sum of misclassification scoring errors; badtot = @SUM( oxcxc( i, c, c1) | c #ne# c1 : wgt( c) * badness( i, c, c1)); min = badtot; ENDSUBMODEL
CALC: @SET( 'TERSEO',1); ! Output level (0:verb, 1:terse, 2:only errors, 3:none); ! Get the index number of the dependent var; @for( vardep( j): idpv = j);! Count observations of each type; nobs = @SIZE(obs); ! We can arbitrarily set the beta's for one category to 0; @FOR( var( j): beta( 1, j) = 0.0; ); @SOLVE( classify); ! Write a little summary report; @WRITE( nobs,' = number of points/items/observations.', @NEWLINE(1)); ! Compute probability of observation of category i being classified as being of type j; @FOR( cxc( k1, k2): prbcls( k1, k2) = 0); @FOR( obs( i): ! Find predicted category of this observation; catwin = 0; scwin = 0; @FOR( cat( k1): @IFC( catwin #eq# 0 #or# score( i, k1) #gt# scwin: catwin = k1; scwin = score( i, k1); ); ); cathome = xobs( i, idpv); prbcls( cathome, catwin) = prbcls( cathome, catwin) + 1; ); @WRITE( @NEWLINE(1)); @WRITE(' Prob( Category i Classified as j)', @NEWLINE(1)); @WRITE(' '); @FOR( cat( j): @WRITE( @FORMAT( cat( j),'11s'),' ')); @WRITE( @NEWLINE(1)); @FOR( cat( i): rowsum = @SUM( cat( j): prbcls( i, j)); rowsum = @SMAX( 1, rowsum); @FOR( cat( j): prbcls( i, j) = prbcls( i, j)/ rowsum); @WRITE( @FORMAT( cat( i),'11s'),': '); @FOR( cat( j): @WRITE( @FORMAT( prbcls( i, j), '5.4f'),' ')); @WRITE( @NEWLINE(1)); ); @WRITE( @NEWLINE(1)); ! Count number of...; Correctpt = 0; !correctly classified points; borderline = 0; !borderline between correct and incorrect; @FOR( obs(i): cathome = xobs( i, idpv); scorehome = score( i, cathome); scoreother = @MAX( cat(c) | c #ne# cathome: score(i,c)); @IFC( scorehome #gt# scoreother: correctpt = correctpt + 1; @else @IFC( scorehome #ge# scoreother - 1: borderline = borderline + 1; @WRITE( i, ' is borderline ', scorehome, ' ', scoreother, @NEWLINE(1)); @else @WRITE( i, ' is misclassified ', scorehome, ' ', scoreother, @NEWLINE(1)); ); ); ); @WRITE( correctpt,' = number strictly and correctly classified.', @NEWLINE(1)); @WRITE( borderline,' = number borderline/ambiguous.', @NEWLINE(1)); @WRITE( nobs - correctpt - borderline,' = number misclassified.', @NEWLINE(1)); @WRITE('The scoring beta:', @NEWLINE(1)); @WRITE(' Category Constant'); @FOR( var( j) | j #ne# idpv: @write( ' ', @FORMAT( var(j),"%11.11s")); ! Always gives a field of 8 characters; ); @WRITE( @NEWLINE(1)); @FOR( cat( c): @WRITE( @FORMAT( cat(c),"%12.12s"), ' '); @WRITE( @FORMAT( beta( c, idpv), '11.6f')); @FOR( var( j) | j #ne# idpv: @WRITE(' ', @FORMAT( beta(c,j), '11.6f')); ); @WRITE( @NEWLINE(1)); ); @WRITE(@NEWLINE(1),' Scores for each observation:', @NEWLINE(1)); @WRITE(' Obs Cat '); @FOR( cat( k): @WRITE(@FORMAT( cat( k),'11s'),' '); ); @WRITE(@NEWLINE(1)); @FOR( obs( i): @WRITE( @FORMAT( obs( i),'5s'),' ', @FORMAT( xobs( i, idpv),'3.0f')); @FOR( cat(k): @WRITE( ' ',@FORMAT( score( i, k),'6.2f')); ); @WRITE( @NEWLINE(1)); ); ! Prepare to do a two-dimensional plot based on dimensions d1 and d2; ! Create set of type 1 items, with 2 dimensions in X1, Y1; @for( varpltx( j): d1 = j); ! Get var on horizontal scale; @for( varplty( j): d2 = j); ! Get var on vertical scale; ! Get the categories to be displayed; cat1 = 0; cat2 = 0; cat3 = 0; @FOR( catplt( k): @IFC( cat1 #le# 0: cat1 = k; @else @IFC( cat2 #le# 0: cat2 = k; @else cat3 = k; ))); ! Create subsets of items for each of the categories cat1, cat2, cat3; @FOR( OBS( I) | xobs( I, idpv) #EQ# cat1: @INSERT( OBS1, I); X1( I) = xobs( I, D1); Y1( I) = xobs( I, D2); ); ! Create set of the type cat2 items, with 2 dimensions in X2, Y2; @FOR( OBS( I) | xobs( I, idpv) #EQ# cat2: @INSERT( OBS2, I); X2( I) = xobs( I, D1); Y2( I) = xobs( I, D2); ); ! Create set of the type cat3 items, with 2 dimensions in X2, Y2; @FOR( OBS( I) | xobs( I, idpv) #EQ# cat3: @INSERT( OBS3, I); X3( I) = xobs( I, D1); Y3( I) = xobs( I, D2); ); ! Now do a scatter plot; @IFC( @SIZE( catplt) #eq# 1: @CHARTSCATTER( 'Two-dimensional plot', !Chart title; @FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis; @FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis; @FORMAT(cat( cat1),"7s"), x1, y1); !Point set 1; ); @IFC( @SIZE( catplt) #eq# 2: @CHARTSCATTER( 'Two-dimensional plot', !Chart title; @FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis; @FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis; @FORMAT(cat( cat1),"7s"), x1, y1, !Point set 1; @FORMAT(cat( cat2),"7s"), x2, y2); !Point set 2; ); @IFC( @SIZE( catplt) #eq# 3: @CHARTSCATTER( 'Two-dimensional plot', !Chart title; @FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis; @FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis; @FORMAT(cat( cat1),"7s"), x1, y1, !Point set 1; @FORMAT(cat( cat2),"7s"), x2, y2, !Point set 1; @FORMAT(cat( cat3),"7s"), x3, y3); !Point set 2; ); ENDCALC