Lindo Systems

! Discrimant analysis with multiple categories/groups,
 via linear optimization.
  The problem:
    We want to predict a categorical outcome/variable, e.g.,
  DryWell vs. OilOnly vs. GasOnly vs. OilGas,
 based on one or more explanatory variables,
 given a set of observations.
  Basic approach: For each item(or observation) i, 
 compute a score for each category or group  c, c = 1, 2, ....
 We classify item i to that category c that has
 the highest score over all categories.
 If xobs(i,j) is the value of item i for feature/variable j,
 score(i,c) = beta(c,0) + beta(c,1)*xobs(i,1) + beta(c,2)*xobs(i,2)+...;
! Ref: Gochet, W.,  A. Stam, V. Srinivasan, and S. Chen (1997),
"Multigroup Discriminant Analysis Using Linear Programming".
Operations Research, vol. 45, no. 2, pp. 213-225;
!Keywords: Categorical regression, Classification, Clustering,
  Discriminant analysis, Machine learning, Random forest, Supervised learning;
sets:
 obs;  ! The rows/items;
 var;  ! The columns or "variables" in the statistical sense;
 cat: wgt;  ! The categories for the dependent var;
 vardep( var); ! The dependent variable;
 varexp( var); ! The set of explanatory variables;
 varpltx( var); ! Variable on X axis in plot/chart/graph;
 varplty( var); ! Variable on Y axis in plot/chart/graph;
 catplt( cat);  ! The categories to plot;

 oxv(obs,var): xobs; ! The matrix of observations;
 cxc( cat, cat): prbcls; ! Probability classification;
 cxv( cat, var): beta; ! "Regression" coefficients;
 oxc( obs, cat): score;
 oxcxc( obs, cat, cat) : badness;
! Subsets useful for doing a 2-dimensional plot;
 OBS1( OBS): X1, Y1;
 OBS2( OBS): X2, Y2;
 OBS3( OBS): X3, Y3;
endsets
data:
! Various data sets;
! Data set 1;
!wg1   cat = TYPE1 TYPE2 TYPE3;  ! The possible categories for the dependent variable;
!wg1   wgt =  1     1     1;   ! Weight to apply to observations of this category;
!  You may want to give more weight to observations of a specific category
  if that category is not well represented in the data, or
  if it is costly to misclassify that category;
!wg1   var = v1  v2 catgry; ! Names of variables, including dependent;
!wg1   vardep = catgry; ! the dependent variable;
!wg1   varexp = v1 v2 ; ! the explanatory variables;
!wg1   varpltx = v1;
!wg1   varplty = v2;
!  and which categories (up to 3) to plot;
!wg1   catplt =  TYPE1 TYPE2 TYPE3;

! The names of the observations;
!wg1  obs = ob11 ob12 ob13 ob21 ob22 ob23 ob31 ob32 ob33;    
! The  matrix of observations. Note, the dependent variable
should take one of the values: 1, 2, 3, ...;
!wg1  xobs = 
     1    2    1
     2    3    1
     2    4    1
     4    2    2
     2.5  3    2
     2.5  4    2
     1    1    3
     2    2.5  3
     4    1    3
 ;

! Data set 2;
!wg2   cat = CAT1 CAT2 CAT3 CAT4 CAT5 ;  ! The possible categories for the dependent variable;
!wg2   wgt =  1    1    1    1    1;  ! Weight to apply to observations of each category;
!wg2   var = v1  v2 catgry; ! Names of variables, including dependent;
!wg2   vardep = catgry;  ! the dependent variable;
!wg2   varexp = v1  v2 ;  ! the explanatory variable;
!wg2   varpltx = v1;   ! The 2 vars to plot;
!wg2   varplty = v2;
!  and which categories (up to 3) to plot;
!wg2    catplt =  CAT2 CAT3 CAT4 ;

! The names of the observations;
!wg2   obs = ob1..ob20;    
! The  matrix of observations;
!wg2       xobs = 
      0   2.6   1
      1   3     1
      1.4 1.8   1
       1.4 2.6   1
       0   1     2 !5;
 !wg2  0   2     2
       1   2     2
       2   2     2
       1   1     3
       2.4 2     3
       3   0.6   3
       3   1.4   3
       1   0     4  !13;
 !wg2  2   0     4
       2   0.6   4
       3   0     4
       0   0     5
       1   0     5  !18;
 !wg2   0   1     5  !19;
 !wg2   0.4 0.4   5
 ;

! Data set 3;
!ds3  cat = CAT1 CAT2 CAT3;  ! The possible categories for the dependent variable;
!ds3   wgt =  1    1   1;   ! Weight to apply to observations of this category;
!ds3  var = v1  v2 catgry; ! Names of variables, including dependent;
!ds3  vardep = catgry ; ! the dependent variable;
!ds3  varexp = v1 v2; ! Explanatory variables;
!ds3  varpltx = v1; ! Variables to plot;
!ds3  varplty = v2;
!  and which categories (up to 3) to plot;
!ds3  catplt =  CAT1 CAT2 CAT3;


! The names of the observations;
!ds3  obs = ob11 ob12 ob13 ob14 ob21 ob22 ob23 ob31 ob32;    
! The  matrix of observations;
!ds3     xobs = 
     2    3    1
     5    6    1
     3    8    1
     2    1    1
     4    6    2
     1    3    2
     4    4.5  2
     2    4    3
     4    5    3
 ;

! Data set 4;
!ds4  cat = CAT1 CAT2;  ! The possible categories for the dependent variable;
!ds4   wgt = 1     1;   ! Weight to apply to observations of this category;
!ds4  var = v1  catgry; ! Names of variables, including dependent;
!ds4  vardep = catgry;  !  the dependent variable;
!ds4  varexp = v1;      ! The explanatory variables;
! The names of the observations;
!ds4 obs = ob11 ob22;    
! The  matrix of observations;
!ds4     xobs = 
     0    1
     0    2
 ;

! Data set from Shuichi;
!ss   cat = GOOD  BAD;  ! The possible categories for the dependent variable;
!ss   wgt =  1     1 ;   ! Weight to apply to observations of each category;
! Names of variables, including dependent;
!ss   var = TEST1     TEST2       TEST3       TEST4       TEST5     TYPE; 
!ss   vardep = TYPE;   ! Dependent variable;
!ss   varexp = TEST1     TEST2       TEST3       TEST4       TEST5; ! Explanatory variables;
!ss   varpltx = TEST1;
!ss   varplty = TEST2;
!  and which categories (up to 3) to plot;
!ss   catplt =  GOOD BAD;
! The names of the observations;
!ss  obs = 1..66;     
! The  matrix of observations;
!ss      xobs = 
! 1ss  346.7000122 247.1999969 220.5       364.1000061 311.7000122 2
! 2ss  334         313.2999878 306.5       330.8999939 311.1000061 2
! 3ss  248.3999939 189.1999969 206.8000031 334.7000122 312.5       1
! 4ss  309         291.8999939 281.2000122 346.2000122 311.1000061 1
! 5ss  328.8999939 306.2000122 259.3999939 336.3999939 310.8999939 2
! 6ss  252.8000031 248.8000031 253.8000031 321         311.7000122 1
! 7ss  313         289.7000122 292.6000061 318         311         2
! 8ss  304.8999939 115.5       284.2000122 316.5       310.5       1
! 9ss  327.8999939 330.7999878 305.7000122 332.6000061 311         1
!10ss  315.3999939 203.8999939 287.1000061 333.7999878 311.5       1
!11ss  333         270.6000061 274.2999878 379.1000061 311.2000122 2
!12ss  242.3999939 145.8999939 292.2999878 318.7000122 311.2999878 2
!13ss  124.9000015 1.1         244.1999969 345.7000122 310.7999878 2
!14ss  323.5       317.2000122 287.3999939 406.1000061 312         1
!15ss  304.2999878 191.6999969 275.7999878 331.7000122 311.5       2
!16ss  382.3999939 124.0999985  30         322.5       316.7000122 1
!17ss  327         275.3999939 290.6000061 345.5       313.3999939 1
!18ss  278.7999878 282.1000061 316.2999878 317         311.2999878 2
!19ss  324.1000061 261.7999878 316.7999878 326.6000061 311.6000061 2
!20ss  249.3999939 260.7999878 292.7999878 317.2000122 310.2999878 1
!21ss  336.2000122 290.7999878 273.2999878 400.3999939 310.7999878 1
!22ss  317         291.8999939 303.5       326.5       310.8999939 2
!23ss  256.8999939 212         282         336.6000061 311.7000122 1
!24ss  292.7999878 181         295.7999878 577.9000244 311.2999878 2
!25ss  342.7000122 306         294.2000122 487.3999939 312.1000061 2
!26ss  336.7000122 301.2999878 273.7000122 342.5       312.7999878 2
!27ss  302.2999878 250.8000031 297.2000122 331.2999878 312.1000061 2
!28ss  328         296.8999939 292.3999939 324.6000061 310.8999939 1
!29ss  312         272         311.6000061 317.7000122 311.2000122 1
!30ss  274.7000122 252.1000061 310.7000122 323.7000122 310.7999878 2
!31ss  315.1000061 301.2000122 300.8999939 410.8999939 310.8999939 2
!32ss  310         245.3000031 306         310.7000122 310.1000061 2
!33ss  335.2000122 298.6000061 314.7999878 317         310.8999939 1
!34ss  345.2000122 353         326.3999939 409.1000061 311.2999878 1
!35ss  348.7999878 357         326         436.5       311.8999939 2
!36ss  324         306.7000122 314         401.7000122 312.7000122 2
!37ss  365.1000061 345         330.7999878 382.2999878 311.8999939 1
!38ss  369.2999878 356.7000122 322.6000061 1034.1      310.8999939 1
!39ss  343.6000061 330.7999878 322.5       462.7999878 312.3999939 1
!40ss  362.7999878 343         333.6000061 785.9000244 311.5       1
!41ss  355.6000061 336.1000061 320.3999939 597.9000244 312.1000061 2
!42ss  357.3999939 378.6000061 323.7999878 891.2999878 311.6000061 1
!43ss  350         347.2999878 343.3999939 538.7999878 313.5       1
!44ss  379         369         333.1000061 716         315.5  1
!45ss  344.2000122 359.6000061 333.7999878 436.6000061 311.8999939 1
!46ss  357         322.5       317         363.3999939 311.7999878 2
!47ss  325.3999939 347.2999878 344.1000061 880.0999756 311.5       1
!48ss  366.8999939 345.2999878 314.2000122 550.2999878 310.8999939 2
!49ss  353.7999878 359.5       335.1000061 425         312.6000061 2
!50ss  330.7000122 328.1000061 323.5       373.1000061 314         1
!51ss  343.7999878 341.3999939 325.7000122 454.7999878 311.8999939 2
!52ss  345.2999878 331.5       295.6000061 400         311         1
!53ss  334.3999939 318.5       315.7999878 459.1000061 311.5       2
!54ss  358.8999939 350.6000061 315.7999878 392         311.7999878 2
!55ss  359.8999939 344.6000061 336.3999939 620         311.7999878 2
!56ss  364.7999878 329.8999939 336.7000122 549.9000244 312.2999878 1
!57ss  349         327.3999939 322.6000061 370.5       311.2999878 1
!58ss  363         364.7000122 324.6000061 1081.7      311.7000122 1
!59ss  330.1000061 363.5       330.6000061 617.5       311.1000061 1
!60ss  363.7000122 345.8999939 336.3999939 599.5       312         2
!61ss  356.1000061 349.3999939 340.5       1010        311.8999939 1
!62ss  358.2999878 363.1000061 317.1000061 474.3999939 311.8999939 2
!63ss  356.7000122 349.7999878 323.7999878 539.0999756 311.2000122 2
!64ss  370.2999878 369.5       317         536.5999756 312         1
!65ss  327.8999939 326.2999878 330.3999939 415.6000061 311         2
!66ss  334.7000122 331.7000122 302.2000122 428.6000061 311.6000061 2;

! Data set 6, Separable diagonally;
!SepDiag  cat =  GOOD BAD;  ! The possible categories for the dependent variable;
!SepDiag  wgt =    1   1;   ! Weight to apply to observations of this category;
!SepDiag  var = xcoord  ycoord catgry; ! Names of variables, including dependent;
!SepDiag  vardep = catgry; ! the dependent variable;
!SepDiag  varexp = xcoord  ycoord; ! Explanatory variables;
!SepDiag  varpltx = xcoord; ! Variables to plot;
!SepDiag  varplty = ycoord;
!  and which categories (up to 3) to plot;
!SepDiag  catplt =  GOOD BAD;


! The names of the observations;
!SepDiag   obs = ob01 ob02 ob03 ob04 ob05 ob06;    
! The  matrix of observations;
!SepDiag  xobs = 
     1    3    1
     2    2    1
     3    1    1
     5    7    2
     6    6    2
     7    5    2
 ;
!Fisher's Iris data ;
!Iris    cat = SETOSA  VERSICOLOR VIRGINICA;
!Iris    wgt =  1  1  1 ;  ! Weight to apply to observations of this category;
!Iris    var = DUMMY SEPLEN SEPWID PETLEN PETWID SPECIES; ! Names of variables, including dependent;
!Iris    vardep = SPECIES;   ! Dependent variable;
!Iris    varexp = SEPLEN SEPWID PETLEN PETWID ; ! Explanatory variables;
  ! Note, the DUMMY variable is disregarded;
!  The two variables to plot;
!Iris    varpltx = SEPLEN;
!Iris    varplty = PETLEN;
!  and which categories (up to 3) to plot;
!Iris  !  catplt =  SETOSA;
!Iris  !  catplt =  SETOSA VERSICOLOR;
!Iris    catplt =  SETOSA VERSICOLOR, VIRGINICA;
! The names of the observations;
!Iris   obs = 1..150;    
! The  matrix of observations; 
!DatasetOrder SepalLength SepalWidth PetalLength PetalWidth Species;
!Iris   xobs =
1   5.1   3.5   1.4   0.2    1
2   4.9   3.0   1.4   0.2    1
3   4.7   3.2   1.3   0.2    1
4   4.6   3.1   1.5   0.2    1
5   5.0   3.6   1.4   0.3    1
6   5.4   3.9   1.7   0.4    1
7   4.6   3.4   1.4   0.3    1
8   5.0   3.4   1.5   0.2    1
9   4.4   2.9   1.4   0.2    1
10   4.9   3.1   1.5   0.1    1
11   5.4   3.7   1.5   0.2    1
12   4.8   3.4   1.6   0.2    1
13   4.8   3.0   1.4   0.1    1
14   4.3   3.0   1.1   0.1    1
15   5.8   4.0   1.2   0.2    1
16   5.7   4.4   1.5   0.4    1
17   5.4   3.9   1.3   0.4    1
18   5.1   3.5   1.4   0.3    1
19   5.7   3.8   1.7   0.3    1
20   5.1   3.8   1.5   0.3    1
21   5.4   3.4   1.7   0.2    1
22   5.1   3.7   1.5   0.4    1
23   4.6   3.6   1.0   0.2    1
24   5.1   3.3   1.7   0.5    1
25   4.8   3.4   1.9   0.2    1
26   5.0   3.0   1.6   0.2    1
27   5.0   3.4   1.6   0.4    1
28   5.2   3.5   1.5   0.2    1
29   5.2   3.4   1.4   0.2    1
30   4.7   3.2   1.6   0.2    1
31   4.8   3.1   1.6   0.2    1
32   5.4   3.4   1.5   0.4    1
33   5.2   4.1   1.5   0.1    1
34   5.5   4.2   1.4   0.2    1
35   4.9   3.1   1.5   0.2    1
36   5.0   3.2   1.2   0.2    1
37   5.5   3.5   1.3   0.2    1
38   4.9   3.6   1.4   0.1    1
39   4.4   3.0   1.3   0.2    1
40   5.1   3.4   1.5   0.2    1
41   5.0   3.5   1.3   0.3    1
42   4.5   2.3   1.3   0.3    1
43   4.4   3.2   1.3   0.2    1
44   5.0   3.5   1.6   0.6    1
45   5.1   3.8   1.9   0.4    1
46   4.8   3.0   1.4   0.3    1
47   5.1   3.8   1.6   0.2    1
48   4.6   3.2   1.4   0.2    1
49   5.3   3.7   1.5   0.2    1
50   5.0   3.3   1.4   0.2    1
51   7.0   3.2   4.7   1.4    2
52   6.4   3.2   4.5   1.5    2
53   6.9   3.1   4.9   1.5    2
54   5.5   2.3   4.0   1.3    2
55   6.5   2.8   4.6   1.5    2
56   5.7   2.8   4.5   1.3    2
57   6.3   3.3   4.7   1.6    2
58   4.9   2.4   3.3   1.0    2
59   6.6   2.9   4.6   1.3    2
60   5.2   2.7   3.9   1.4    2
61   5.0   2.0   3.5   1.0    2
62   5.9   3.0   4.2   1.5    2
63   6.0   2.2   4.0   1.0    2
64   6.1   2.9   4.7   1.4    2
65   5.6   2.9   3.6   1.3    2
66   6.7   3.1   4.4   1.4    2
67   5.6   3.0   4.5   1.5    2
68   5.8   2.7   4.1   1.0    2
69   6.2   2.2   4.5   1.5    2
70   5.6   2.5   3.9   1.1    2
71   5.9   3.2   4.8   1.8    2
72   6.1   2.8   4.0   1.3    2
73   6.3   2.5   4.9   1.5    2
74   6.1   2.8   4.7   1.2    2
75   6.4   2.9   4.3   1.3    2
76   6.6   3.0   4.4   1.4    2
77   6.8   2.8   4.8   1.4    2
78   6.7   3.0   5.0   1.7    2
79   6.0   2.9   4.5   1.5    2
80   5.7   2.6   3.5   1.0    2
81   5.5   2.4   3.8   1.1    2
82   5.5   2.4   3.7   1.0    2
83   5.8   2.7   3.9   1.2    2
84   6.0   2.7   5.1   1.6    2
85   5.4   3.0   4.5   1.5    2
86   6.0   3.4   4.5   1.6    2
87   6.7   3.1   4.7   1.5    2
88   6.3   2.3   4.4   1.3    2
89   5.6   3.0   4.1   1.3    2
90   5.5   2.5   4.0   1.3    2
91   5.5   2.6   4.4   1.2    2
92   6.1   3.0   4.6   1.4    2
93   5.8   2.6   4.0   1.2    2
94   5.0   2.3   3.3   1.0    2
95   5.6   2.7   4.2   1.3    2
96   5.7   3.0   4.2   1.2    2
97   5.7   2.9   4.2   1.3    2
98   6.2   2.9   4.3   1.3    2
99   5.1   2.5   3.0   1.1    2
100   5.7   2.8   4.1   1.3    2
101   6.3   3.3   6.0   2.5    3
102   5.8   2.7   5.1   1.9    3
103   7.1   3.0   5.9   2.1    3
104   6.3   2.9   5.6   1.8    3
105   6.5   3.0   5.8   2.2    3
106   7.6   3.0   6.6   2.1    3
107   4.9   2.5   4.5   1.7    3
108   7.3   2.9   6.3   1.8    3
109   6.7   2.5   5.8   1.8    3
110   7.2   3.6   6.1   2.5    3
111   6.5   3.2   5.1   2.0    3
112   6.4   2.7   5.3   1.9    3
113   6.8   3.0   5.5   2.1    3
114   5.7   2.5   5.0   2.0    3
115   5.8   2.8   5.1   2.4    3
116   6.4   3.2   5.3   2.3    3
117   6.5   3.0   5.5   1.8    3
118   7.7   3.8   6.7   2.2    3
119   7.7   2.6   6.9   2.3    3
120   6.0   2.2   5.0   1.5    3
121   6.9   3.2   5.7   2.3    3
122   5.6   2.8   4.9   2.0    3
123   7.7   2.8   6.7   2.0    3
124   6.3   2.7   4.9   1.8    3
125   6.7   3.3   5.7   2.1    3
126   7.2   3.2   6.0   1.8    3
127   6.2   2.8   4.8   1.8    3
128   6.1   3.0   4.9   1.8    3
129   6.4   2.8   5.6   2.1    3
130   7.2   3.0   5.8   1.6    3
131   7.4   2.8   6.1   1.9    3
132   7.9   3.8   6.4   2.0    3
133   6.4   2.8   5.6   2.2    3
134   6.3   2.8   5.1   1.5    3
135   6.1   2.6   5.6   1.4    3
136   7.7   3.0   6.1   2.3    3
137   6.3   3.4   5.6   2.4    3
138   6.4   3.1   5.5   1.8    3
139   6.0   3.0   4.8   1.8    3
140   6.9   3.1   5.4   2.1    3
141   6.7   3.1   5.6   2.4    3
142   6.9   3.1   5.1   2.3    3
143   5.8   2.7   5.1   1.9    3
144   6.8   3.2   5.9   2.3    3
145   6.7   3.3   5.7   2.5    3
146   6.7   3.0   5.2   2.3    3
147   6.3   2.5   5.0   1.9    3
148   6.5   3.0   5.2   2.0    3
149   6.2   3.4   5.4   2.3    3
150   5.9   3.0   5.1   1.8    3
;

! Various data sets;
!CaCircle  cat = TYPEA TYPEB;
!CaCircle  wgt =  1     1; ! Wgt to apply to according to class;
!CaCircle  var = X1  X2  X1SQ X2SQ type ;
!CaCircle  vardep = type;
!CaCircle  ! varexp = X1 X2;
!CaCircle  varexp = X1 X2 X1SQ X2SQ;  !Include squared terms;
!CaCircle  varpltx = X1;
!CaCircle  varplty = X2;
!CaCircle  catplt = TYPEA TYPEB;
!CaCircle  xobs = 
 22.98  21.75 528.30 473.06  2 
 28.27  24.90 799.05 620.01  1 
 21.85  22.88 477.42 523.71  2 
 24.90  28.34 620.01 803.19  1 
 20.30  23.30 412.09 542.89  2 
 20.30  29.60 412.09 876.16  1 
 18.75  22.88 351.56 523.71  2 
 15.70  28.34 246.49 803.19  1 
 17.62  21.75 310.30 473.06  2 
 12.33  24.90 152.09 620.01  1 
 17.20  20.20 295.84 408.04  2 
 11.10  20.20 123.21 408.04  1 
 17.62  18.65 310.30 347.82  2 
 12.33  15.50 152.09 240.25  1 
 18.75  17.52 351.56 306.79  2 
 15.70  12.06 246.49 145.43  1 
 20.30  17.10 412.09 292.41  2 
 20.30  10.80 412.09 116.64  1 
 21.85  17.52 477.42 306.79  2 
 24.90  12.06 620.01 145.43  1 
 22.98  18.65 528.30 347.82  2 
 28.27  15.50 799.05 240.25  1 
 23.40  20.20 547.56 408.04  2 
 29.50  20.20 870.25 408.04  1 ;

! Case: Unequal class sizes;
!CaUneq01    cat = TYPEA TYPEB;
!CaUneq01  !  wgt =   1     1; ! Wgt to apply to according to class;
!CaUneq01    wgt =  1    0.2; ! Wgt to apply to according to class;
!CaUneq01    var = value type ;
!CaUneq01    vardep = type;
!CaUneq01    varexp = value;
!CaUneq01    varpltx = value;
!  and which categories (up to 3) to plot;
!CaUneq01    catplt =  TYPEA TYPEB;
!CaUneq01    xobs =
    1  1  
    2  2  
    3  2  
    7  2  
    8  2  
    9  2;

! Case 2 : Unequal class sizes, not separable;
!CaUneq02    cat = TYPEA TYPEB;
!CaUneq02  !  wgt =    1    1;
!CaUneq02    wgt =    0.2   1;
!CaUneq02    var = value type;
!CaUneq02    vardep = type;
!CaUneq02    varexp = value;
!  The two variables to plot;
!CaUneq02    varpltx = value;
 !  and which categories (up to 3) to plot;
!CaUneq02    catplt =  TYPEA TYPEB;
! The  matrix of observations; 
!CaUneq02    xobs =
    1  1  
    2  1  
    3  2  
    7  1  
    8  1  
    9  1;

! Another inseparable case;
!CaInsep     cat = TYPEA TYPEB;
!CaInsep     wgt =   1    1;
!CaInsep     var = value type;
!CaInsep     vardep = type;
!CaInsep     varexp = value;
!  The two variables to plot;
!CaInsep     varpltx = value;
!CaInsep     varplty = value;
 !  and which categories (up to 3) to plot;
!CaInsep     catplt =  TYPEA TYPEB;
! The  matrix of observations; 
!CaInsep     xobs =
    1  2  
    2  2  
    2  1  
    3  1  ;

! A 2D inseparable case;
!Ca2D    cat = RED  GREEN;
!Ca2D    wgt = 1.1 0.917 ;
!Ca2D    var = X1  X2 type;
!Ca2D    vardep = type;
!Ca2D    varexp = X1 X2;
!  The two variables to plot;
!Ca2D    varpltx = X1; 
!Ca2D    varplty = X2;
!Ca2D    catplt = RED  GREEN;
!Ca2D    xobs =
    1   2   2    
    1.1 4   2    
    2   1   2    
    2   3   2    
    2   5   2    
    2   5.2 1 
    3   1   1 
    3   4   1  
    3.5 3   1 
    4   0   2    
    5   2   1 ;


! Illustrate separability and parsimony;
!CaSePar   cat = RED  GREEN;
!CaSePar    wgt =  1   1;
!CaSePar   var = X1  X2  type;
!CaSePar   vardep = type;
!CaSePar   varexp = X1 X2;
!CaSePar   varpltx = X1;
!CaSePar   varplty = X2;
!CaSePar   catplt = RED  GREEN;
!CaSePar   xobs =
   1 8 2 
   2 6 2 
   3 9 2 
   4 7 2 
   6 3 1 
   7 1 1 
   8 4 1 
   9 2 1 ;

!  Genuine and counterfeit banknotes (100 Swiss Francs),
various measurements.
Banknotes BN1 to BN100 are genuine (Good=1),
all others are counterfeit (Good=2).
Dataset courtesy of H. Riedwyl, Bern, Switzerland;
!CaSwiss;  cat = FAKE  GOOD ;
!CaSwiss;  wgt =  1     1;
!CaSwiss;  var = 
      Length  Left    Right   Bottom  Top     Diagonal  Good ;
!CaSwiss;  vardep = Good;      ! Dependent variable ;
!CaSwiss;  varexp = Length  Left     Right   Bottom  Top  Diagonal;  ! Explanatory vars;
!CaSwiss;  varpltx = bottom;    ! Variable on X axis in plot/chart/graph;
!CaSwiss;  varplty = diagonal;  ! Variable on Y axis in plot/chart/graph;
!CaSwiss;  catplt = FAKE  GOOD; ! Categories to plot;
!CaSwiss;  OBS, xobs =
BN1     214.8   131.0   131.1   9.0     9.7     141.0   1
BN2     214.6   129.7   129.7   8.1     9.5     141.7   1
BN3     214.8   129.7   129.7   8.7     9.6     142.2   1
BN4     214.8   129.7   129.6   7.5     10.4    142.0   1
BN5     215.0   129.6   129.7   10.4    7.7     141.8   1
BN6     215.7   130.8   130.5   9.0     10.1    141.4   1
BN7     215.5   129.5   129.7   7.9     9.6     141.6   1
BN8     214.5   129.6   129.2   7.2     10.7    141.7   1
BN9     214.9   129.4   129.7   8.2     11.0    141.9   1
BN10    215.2   130.4   130.3   9.2     10.0    140.7   1
BN11    215.3   130.4   130.3   7.9     11.7    141.8   1
BN12    215.1   129.5   129.6   7.7     10.5    142.2   1
BN13    215.2   130.8   129.6   7.9     10.8    141.4   1
BN14    214.7   129.7   129.7   7.7     10.9    141.7   1
BN15    215.1   129.9   129.7   7.7     10.8    141.8   1
BN16    214.5   129.8   129.8   9.3     8.5     141.6   1
BN17    214.6   129.9   130.1   8.2     9.8     141.7   1
BN18    215.0   129.9   129.7   9.0     9.0     141.9   1
BN19    215.2   129.6   129.6   7.4     11.5    141.5   1
BN20    214.7   130.2   129.9   8.6     10.0    141.9   1
BN21    215.0   129.9   129.3   8.4     10.0    141.4   1
BN22    215.6   130.5   130.0   8.1     10.3    141.6   1
BN23    215.3   130.6   130.0   8.4     10.8    141.5   1
BN24    215.7   130.2   130.0   8.7     10.0    141.6   1
BN25    215.1   129.7   129.9   7.4     10.8    141.1   1
BN26    215.3   130.4   130.4   8.0     11.0    142.3   1
BN27    215.5   130.2   130.1   8.9     9.8     142.4   1
BN28    215.1   130.3   130.3   9.8     9.5     141.9   1
BN29    215.1   130.0   130.0   7.4     10.5    141.8   1
BN30    214.8   129.7   129.3   8.3     9.0     142.0   1
BN31    215.2   130.1   129.8   7.9     10.7    141.8   1
BN32    214.8   129.7   129.7   8.6     9.1     142.3   1
BN33    215.0   130.0   129.6   7.7     10.5    140.7   1
BN34    215.6   130.4   130.1   8.4     10.3    141.0   1
BN35    215.9   130.4   130.0   8.9     10.6    141.4   1
BN36    214.6   130.2   130.2   9.4     9.7     141.8   1
BN37    215.5   130.3   130.0   8.4     9.7     141.8   1
BN38    215.3   129.9   129.4   7.9     10.0    142.0   1
BN39    215.3   130.3   130.1   8.5     9.3     142.1   1
BN40    213.9   130.3   129.0   8.1     9.7     141.3   1
BN41    214.4   129.8   129.2   8.9     9.4     142.3   1
BN42    214.8   130.1   129.6   8.8     9.9     140.9   1
BN43    214.9   129.6   129.4   9.3     9.0     141.7   1
BN44    214.9   130.4   129.7   9.0     9.8     140.9   1
BN45    214.8   129.4   129.1   8.2     10.2    141.0   1
BN46    214.3   129.5   129.4   8.3     10.2    141.8   1
BN47    214.8   129.9   129.7   8.3     10.2    141.5   1
BN48    214.8   129.9   129.7   7.3     10.9    142.0   1
BN49    214.6   129.7   129.8   7.9     10.3    141.1   1
BN50    214.5   129.0   129.6   7.8     9.8     142.0   1
BN51    214.6   129.8   129.4   7.2     10.0    141.3   1
BN52    215.3   130.6   130.0   9.5     9.7     141.1   1
BN53    214.5   130.1   130.0   7.8     10.9    140.9   1
BN54    215.4   130.2   130.2   7.6     10.9    141.6   1
BN55    214.5   129.4   129.5   7.9     10.0    141.4   1
BN56    215.2   129.7   129.4   9.2     9.4     142.0   1
BN57    215.7   130.0   129.4   9.2     10.4    141.2   1
BN58    215.0   129.6   129.4   8.8     9.0     141.1   1
BN59    215.1   130.1   129.9   7.9     11.0    141.3   1
BN60    215.1   130.0   129.8   8.2     10.3    141.4   1
BN61    215.1   129.6   129.3   8.3     9.9     141.6   1
BN62    215.3   129.7   129.4   7.5     10.5    141.5   1
BN63    215.4   129.8   129.4   8.0     10.6    141.5   1
BN64    214.5   130.0   129.5   8.0     10.8    141.4   1
BN65    215.0   130.0   129.8   8.6     10.6    141.5   1
BN66    215.2   130.6   130.0   8.8     10.6    140.8   1
BN67    214.6   129.5   129.2   7.7     10.3    141.3   1
BN68    214.8   129.7   129.3   9.1     9.5     141.5   1
BN69    215.1   129.6   129.8   8.6     9.8     141.8   1
BN70    214.9   130.2   130.2   8.0     11.2    139.6   1
BN71    213.8   129.8   129.5   8.4     11.1    140.9   1
BN72    215.2   129.9   129.5   8.2     10.3    141.4   1
BN73    215.0   129.6   130.2   8.7     10.0    141.2   1
BN74    214.4   129.9   129.6   7.5     10.5    141.8   1
BN75    215.2   129.9   129.7   7.2     10.6    142.1   1
BN76    214.1   129.6   129.3   7.6     10.7    141.7   1
BN77    214.9   129.9   130.1   8.8     10.0    141.2   1
BN78    214.6   129.8   129.4   7.4     10.6    141.0   1
BN79    215.2   130.5   129.8   7.9     10.9    140.9   1
BN80    214.6   129.9   129.4   7.9     10.0    141.8   1
BN81    215.1   129.7   129.7   8.6     10.3    140.6   1
BN82    214.9   129.8   129.6   7.5     10.3    141.0   1
BN83    215.2   129.7   129.1   9.0     9.7     141.9   1
BN84    215.2   130.1   129.9   7.9     10.8    141.3   1
BN85    215.4   130.7   130.2   9.0     11.1    141.2   1 
BN86    215.1   129.9   129.6   8.9     10.2    141.5   1
BN87    215.2   129.9   129.7   8.7     9.5     141.6   1
BN88    215.0   129.6   129.2   8.4     10.2    142.1   1
BN89    214.9   130.3   129.9   7.4     11.2    141.5   1
BN90    215.0   129.9   129.7   8.0     10.5    142.0   1
BN91    214.7   129.7   129.3   8.6     9.6     141.6   1
BN92    215.4   130.0   129.9   8.5     9.7     141.4   1
BN93    214.9   129.4   129.5   8.2     9.9     141.5   1
BN94    214.5   129.5   129.3   7.4     10.7    141.5   1
BN95    214.7   129.6   129.5   8.3     10.0    142.0   1
BN96    215.6   129.9   129.9   9.0     9.5     141.7   1
BN97    215.0   130.4   130.3   9.1     10.2    141.1   1
BN98    214.4   129.7   129.5   8.0     10.3    141.2   1
BN99    215.1   130.0   129.8   9.1     10.2    141.5   1
BN100   214.7   130.0   129.4   7.8     10.0    141.2   1
BN101   214.4   130.1   130.3   9.7     11.7    139.8   2
BN102   214.9   130.5   130.2   11.0    11.5    139.5   2 
BN103   214.9   130.3   130.1   8.7     11.7    140.2   2
BN104   215.0   130.4   130.6   9.9     10.9    140.3   2
BN105   214.7   130.2   130.3   11.8    10.9    139.7   2
BN106   215.0   130.2   130.2   10.6    10.7    139.9   2 
BN107   215.3   130.3   130.1   9.3     12.1    140.2   2
BN108   214.8   130.1   130.4   9.8     11.5    139.9   2
BN109   215.0   130.2   129.9   10.0    11.9    139.4   2
BN110   215.2   130.6   130.8   10.4    11.2    140.3   2
BN111   215.2   130.4   130.3   8.0     11.5    139.2   2
BN112   215.1   130.5   130.3   10.6    11.5    140.1   2
BN113   215.4   130.7   131.1   9.7     11.8    140.6   2
BN114   214.9   130.4   129.9   11.4    11.0    139.9   2
BN115   215.1   130.3   130.0   10.6    10.8    139.7   2
BN116   215.5   130.4   130.0   8.2     11.2    139.2   2
BN117   214.7   130.6   130.1   11.8    10.5    139.8   2
BN118   214.7   130.4   130.1   12.1    10.4    139.9   2
BN119   214.8   130.5   130.2   11.0    11.0    140.0   2 
BN120   214.4   130.2   129.9   10.1    12.0    139.2   2
BN121   214.8   130.3   130.4   10.1    12.1    139.6   2
BN122   215.1   130.6   130.3   12.3    10.2    139.6   2
BN123   215.3   130.8   131.1   11.6    10.6    140.2   2
BN124   215.1   130.7   130.4   10.5    11.2    139.7   2
BN125   214.7   130.5   130.5   9.9     10.3    140.1   2
BN126   214.9   130.0   130.3   10.2    11.4    139.6   2
BN127   215.0   130.4   130.4   9.4     11.6    140.2   2
BN128   215.5   130.7   130.3   10.2    11.8    140.0   2
BN129   215.1   130.2   130.2   10.1    11.3    140.3   2 
BN130   214.5   130.2   130.6   9.8     12.1    139.9   2
BN131   214.3   130.2   130.0   10.7    10.5    139.8   2
BN132   214.5   130.2   129.8   12.3    11.2    139.2   2
BN133   214.9   130.5   130.2   10.6    11.5    139.9   2 
BN134   214.6   130.2   130.4   10.5    11.8    139.7   2
BN135   214.2   130.0   130.2   11.0    11.2    139.5   2  
BN136   214.8   130.1   130.1   11.9    11.1    139.5   2
BN137   214.6   129.8   130.2   10.7    11.1    139.4   2 
BN138   214.9   130.7   130.3   9.3     11.2    138.3   2
BN139   214.6   130.4   130.4   11.3    10.8    139.8   2
BN140   214.5   130.5   130.2   11.8    10.2    139.6   2  
BN141   214.8   130.2   130.3   10.0    11.9    139.3   2
BN142   214.7   130.0   129.4   10.2    11.0    139.2   2
BN143   214.6   130.2   130.4   11.2    10.7    139.9   2
BN144   215.0   130.5   130.4   10.6    11.1    139.9   2
BN145   214.5   129.8   129.8   11.4    10.0    139.3   2
BN146   214.9   130.6   130.4   11.9    10.5    139.8   2
BN147   215.0   130.5   130.4   11.4    10.7    139.9   2
BN148   215.3   130.6   130.3   9.3     11.3    138.1   2
BN149   214.7   130.2   130.1   10.7    11.0    139.4   2
BN150   214.9   129.9   130.0   9.9     12.3    139.4   2
BN151   214.9   130.3   129.9   11.9    10.6    139.8   2
BN152   214.6   129.9   129.7   11.9    10.1    139.0   2
BN153   214.6   129.7   129.3   10.4    11.0    139.3   2
BN154   214.5   130.1   130.1   12.1    10.3    139.4   2
BN155   214.5   130.3   130.0   11.0    11.5    139.5   2
BN156   215.1   130.0   130.3   11.6    10.5    139.7   2
BN157   214.2   129.7   129.6   10.3    11.4    139.5   2
BN158   214.4   130.1   130.0   11.3    10.7    139.2   2
BN159   214.8   130.4   130.6   12.5    10.0    139.3   2
BN160   214.6   130.6   130.1   8.1     12.1    137.9   2
BN161   215.6   130.1   129.7   7.4     12.2    138.4   2
BN162   214.9   130.5   130.1   9.9     10.2    138.1   2
BN163   214.6   130.1   130.0   11.5    10.6    139.5   2
BN164   214.7   130.1   130.2   11.6    10.9    139.1   2 
BN165   214.3   130.3   130.0   11.4    10.5    139.8   2
BN166   215.1   130.3   130.6   10.3    12.0    139.7   2
BN167   216.3   130.7   130.4   10.0    10.1    138.8   2
BN168   215.6   130.4   130.1   9.6     11.2    138.6   2
BN169   214.8   129.9   129.8   9.6     12.0    139.6   2
BN170   214.9   130.0   129.9   11.4    10.9    139.7   2
BN171   213.9   130.7   130.5   8.7     11.5    137.8   2
BN172   214.2   130.6   130.4   12.0    10.2    139.6   2
BN173   214.8   130.5   130.3   11.8    10.5    139.4   2
BN174   214.8   129.6   130.0   10.4    11.6    139.2   2
BN175   214.8   130.1   130.0   11.4    10.5    139.6   2
BN176   214.9   130.4   130.2   11.9    10.7    139.0   2 
BN177   214.3   130.1   130.1   11.6    10.5    139.7   2
BN178   214.5   130.4   130.0   9.9     12.0    139.6   2
BN179   214.8   130.5   130.3   10.2    12.1    139.1   2
BN180   214.5   130.2   130.4   8.2     11.8    137.8   2
BN181   215.0   130.4   130.1   11.4    10.7    139.1   2
BN182   214.8   130.6   130.6   8.0     11.4    138.7   2
BN183   215.0   130.5   130.1   11.0    11.4    139.3   2
BN184   214.6   130.5   130.4   10.1    11.4    139.3   2
BN185   214.7   130.2   130.1   10.7    11.1    139.5   2
BN186   214.7   130.4   130.0   11.5    10.7    139.4   2
BN187   214.5   130.4   130.0   8.0     12.2    138.5   2
BN188   214.8   130.0   129.7   11.4    10.6    139.2   2
BN189   214.8   129.9   130.2   9.6     11.9    139.4   2 
BN190   214.6   130.3   130.2   12.7    9.1     139.2   2 
BN191   215.1   130.2   129.8   10.2    12.0    139.4   2
BN192   215.4   130.5   130.6   8.8     11.0    138.6   2
BN193   214.7   130.3   130.2   10.8    11.1    139.2   2 
BN194   215.0   130.5   130.3   9.6     11.0    138.5   2
BN195   214.9   130.3   130.5   11.6    10.6    139.8   2
BN196   215.0   130.4   130.3   9.9     12.1    139.6   2
BN197   215.1   130.3   129.9   10.3    11.5    139.7   2
BN198   214.8   130.3   130.4   10.6    11.1    140.0   2
BN199   214.7   130.7   130.8   11.2    11.2    139.4   2
BN200   214.3   129.9   129.9   10.2    11.5    139.6   2
;
enddata

submodel classify:
! Parameters:
    idpv = index of the explanatory variable,
    xobs(i,j) = observed value for observation i for
             explanatory variable j, n #ne# idpv,
    xobs(i,idpv) = index of the category to which observation belongs;
! Decision variables:
    score(i,c) = score of observation i, using scoring function of category c
;
   ! The scoring coefficents can be - or +;
   @for( cxv(c,j): @free( beta(c,j)));

  ! Score of observation i according to category c;
  @for( oxc( i, c) : 
       score(i,c) = beta( c, idpv) + @sum( varexp( j): beta( c, j)*xobs( i, j));
       @free( score( i, c));
      );

 !  Compute badness, or scoring error, of observation i in category c relative
     to all other groups c1.  If observation i is in category c,
     then score( i, c) should be >= score( i, c1) + 1, for c1 #ne# c,
     i.e., we would like a clear gap of at least 1 between categories;
  @for( cat( c):
     @for( cat( c1) | c1 #ne# c:
       @for( obs( i) | xobs( i, idpv) #eq# c:
          badness(i, c, c1) >= ( score( i, c1) - score( i, c) + 1);
 ! This breaks ties of the type ( 3 - 2 + 1) vs ( 2 - 3 + 1) 
    in favor of the tie (2.5 - 2.5 + 1). All have total badness of 2;
          badness( i, c, c1) >= 2*( score( i, c1) - score( i, c) + 0.5);
           );
         );
       );

 ! minimize total badness or sum of misclassification scoring errors;
   badtot = @sum( oxcxc( i, c, c1)  | c #ne# c1 : wgt( c) * badness( i, c, c1));
   min = badtot;
endsubmodel

calc:
  @SET( 'TERSEO',1);    ! Output level (0:verb, 1:terse, 2:only errors, 3:none);
! Get the index number of the dependent var;
  @for( vardep( j): idpv = j);! Count observations of each type;


  nobs = @size(obs);
! We can arbitrarily set the beta's for one category to 0;
  @for( var( j):
     beta( 1, j) = 0.0;
       );
  @solve( classify);

!  Write a little summary report;
  @write( nobs,' = number of points/items/observations.',  @newline(1));

! Compute probability of observation of 
    category i being classified as being of type j;
   @for( cxc( k1, k2): prbcls( k1, k2) = 0);
   @for( obs( i):
! Find predicted category of this observation;
     catwin = 0;
     scwin = 0;
     @for( cat( k1):
       @ifc( catwin #eq# 0 #or# score( i, k1) #gt# scwin:
         catwin = k1;
         scwin = score( i, k1);
          );
       );
      cathome = xobs( i, idpv);
      prbcls( cathome, catwin) = prbcls( cathome, catwin) + 1;
       );

   @write( @newline(1));
   @write('      Prob( Category i Classified as j)', @newline(1));
   @write('        ');
   @for( cat( j): @write( @format( cat( j),'11s'),' '));
   @write( @newline(1));
   @for( cat( i):
     rowsum = @sum( cat( j): prbcls( i, j));
     rowsum = @smax( 1, rowsum);
     @for( cat( j): prbcls( i, j) = prbcls( i, j)/ rowsum);
     @write( @format( cat( i),'11s'),': ');
     @for( cat( j):  @write( @format( prbcls( i, j), '5.4f'),'      '));
     @write( @newline(1));
       );
     @write( @newline(1));

! Count number of...; 
  Correctpt = 0;  !correctly classified points;
  borderline = 0; !borderline between correct and incorrect;
  @for( obs(i):
    cathome = xobs( i, idpv);
    scorehome = score( i, cathome);
    scoreother = @max( cat(c) | c #ne# cathome: score(i,c));
    @ifc( scorehome #gt# scoreother: 
        correctpt = correctpt + 1;
      @else
         @ifc( scorehome #ge# scoreother - 1:
           borderline = borderline + 1;
             @write( i, ' is borderline ', scorehome, ' ', scoreother, @newline(1));
           @else
             @write( i, ' is misclassified ', scorehome, ' ', scoreother, @newline(1));
              );
         );
      ); 
 @write( correctpt,' = number strictly and correctly classified.',  @newline(1));
 @write( borderline,' = number borderline/ambiguous.',  @newline(1));
 @write( nobs - correctpt - borderline,' = number misclassified.',  @newline(1));


 @write('The scoring beta:', @newline(1));
 @write('    Category    Constant');
 @for( var( j) | j #ne# idpv:
     @write( ' ', @FORMAT( var(j),"%11.11s"));   ! Always gives a field of 8 characters;
     );
 @write( @newline(1));
 @for( cat( c):
   @write( @FORMAT( cat(c),"%12.12s"), ' '); 
   @write( @format( beta( c, idpv), '11.6f'));
   @for( var( j) | j #ne# idpv:
      @write(' ', @format( beta(c,j), '11.6f'));
       );
   @write( @newline(1));
     );
 @write(@newline(1),' Scores for each observation:', @newline(1));
 @write(' Obs  Cat ');
 @for( cat( k):
   @write(@format( cat( k),'11s'),' ');
     );
 @write(@newline(1));

 @for( obs( i):
    @write( @format( obs( i),'5s'),' ', @format( xobs( i, idpv),'3.0f'));
    @for( cat(k):
       @write( '      ',@format( score( i, k),'6.2f'));
        );
    @write( @newline(1));
     );

! Prepare to do a two-dimensional plot based on dimensions d1 and d2;
!  Create set of type 1 items, with 2 dimensions in X1, Y1;
   @for( varpltx( j): d1 = j); ! Get var on horizontal scale;
   @for( varplty( j): d2 = j); ! Get var on vertical scale;
! Get the categories to be displayed;
   cat1 = 0;
   cat2 = 0;
   cat3 = 0;
   @for( catplt( k):
     @ifc( cat1 #le# 0:
       cat1 = k;
      @else
        @ifc( cat2 #le# 0:
          cat2 = k;
          @else
            cat3 = k;
           )));
! Create subsets of items for each of the categories cat1, cat2, cat3;
   @FOR( OBS( I) | xobs( I, idpv) #EQ# cat1:
    @INSERT( OBS1, I);
    X1( I) = xobs( I, D1);
    Y1( I) = xobs( I, D2);
      );
! Create set of the type cat2 items, with 2 dimensions in X2, Y2;
  @FOR( OBS( I) | xobs( I, idpv) #EQ# cat2:
    @INSERT( OBS2, I);
    X2( I) = xobs( I, D1);
    Y2( I) = xobs( I, D2);
      );
! Create set of the type cat3 items, with 2 dimensions in X2, Y2;
  @FOR( OBS( I) | xobs( I, idpv) #EQ# cat3:
    @INSERT( OBS3, I);
    X3( I) = xobs( I, D1);
    Y3( I) = xobs( I, D2);
      );

! Now do a scatter plot;
  @ifc( @size( catplt) #eq# 1:
  @CHARTSCATTER( 'Two-dimensional plot', !Chart title;
      @FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis;
      @FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis;
      @FORMAT(cat( cat1),"7s"), x1, y1);  !Point set 1;
      );
  @ifc( @size( catplt) #eq# 2:
  @CHARTSCATTER( 'Two-dimensional plot', !Chart title;
      @FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis;
      @FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis;
      @FORMAT(cat( cat1),"7s"), x1, y1,  !Point set 1;
      @FORMAT(cat( cat2),"7s"), x2, y2); !Point set 2;
      );
  @ifc( @size( catplt) #eq# 3:
  @CHARTSCATTER( 'Two-dimensional plot', !Chart title;
      @FORMAT(var( D1),"7s")+' MEASURE', !Legend for X axis;
      @FORMAT(var( D2),"7s")+' MEASURE', !Legend for Y axis;
      @FORMAT(cat( cat1),"7s"), x1, y1,  !Point set 1;
      @FORMAT(cat( cat2),"7s"), x2, y2,  !Point set 1;
      @FORMAT(cat( cat3),"7s"), x3, y3); !Point set 2;
      );

endcalc