! Neural Network training in LINGO          (NeuralNetGen.lng)
    We want to predict the (binary) outcome ( -1 or + 1) 
  for a each of a set of observations.
  We use a neural network with 4 layers.  
  Layer 0 is the input layer, consisting of, in this case 5 nodes, 
  i.e., the features that are observed for each observation.
  Layers 1 and 2 are the hidden layers.
  Layer 3 is the output or prediction layer. It has just one node
  that should have an output of either +1 or -1, depending on the observation.
    The key idea is that each node at level v, computes an input
  X = a weighted average of the outputs of the nodes at level v-1, and 
      then converts this to
  Y = a sigmoid function of X, an output in [-1, +1].

!The key variables are:
  BETAvX( j, k)) = weight applied to output from node j at level v-1
                     in computing the input to node k at level v,
  BETAvC( k, v) = constant term in computing input to node k, level v;

!Keywords: ANN, Artificial neural network, Forecasting, Machine learning, ML, Neural network;

SETS: 
! Neural network with 2 hidden layers;
 HIDLAY1: BETA1C; 
 HIDLAY2: BETA2C, BETA3X;
 FEATURE: ;
 OBS: X3, Y3;
 OXT( OBS, FEATURE): XDAT;
 OXH1( OBS, HIDLAY1): X1, Y1 ;
 OXH2( OBS, HIDLAY2): X2, Y2 ;
 H1XF( HIDLAY1, FEATURE): BETA1X;
 H2XH1( HIDLAY2, HIDLAY1): BETA2X;
ENDSETS
DATA: HIDLAY1 = 1..10; ! Number nodes in layer 1; HIDLAY2 = 1..8; ! Number nodes in layer 2; ! ThA 66 variable set; OBS = 1..66; HOLDOUT = 50; ! Observation to holdout, to test quality of prediction. If the predictor is good, it should predict well not only for the in-sample/training observations, but also for the holdout observation; ! Each observation consists of 5 test scores + a dependent variable, TYPE. We would like to predict the TYPE, based on the 5 explanatory variables; FEATURE= TEST1 TEST2 TEST3 TEST4 TEST5 TYPE; DEPVAR = 6; ! Index of dependent variable; XDAT = ! 1; 346.7000122 247.1999969 220.5 364.1000061 311.7000122 -1 ! 2; 334 313.2999878 306.5 330.8999939 311.1000061 -1 ! 3; 248.3999939 189.1999969 206.8000031 334.7000122 312.5 1 ! 4; 309 291.8999939 281.2000122 346.2000122 311.1000061 1 ! 5; 328.8999939 306.2000122 259.3999939 336.3999939 310.8999939 -1 ! 6; 252.8000031 248.8000031 253.8000031 321 311.7000122 1 ! 7; 313 289.7000122 292.6000061 318 311 -1 ! 8; 304.8999939 115.5 284.2000122 316.5 310.5 1 ! 9; 327.8999939 330.7999878 305.7000122 332.6000061 311 1 !10; 315.3999939 203.8999939 287.1000061 333.7999878 311.5 1 !11; 333 270.6000061 274.2999878 379.1000061 311.2000122 -1 !12; 242.3999939 145.8999939 292.2999878 318.7000122 311.2999878 -1 !13; 124.9000015 1.1 244.1999969 345.7000122 310.7999878 -1 !14; 323.5 317.2000122 287.3999939 406.1000061 312 1 !15; 304.2999878 191.6999969 275.7999878 331.7000122 311.5 -1 !16; 382.3999939 124.0999985 30 322.5 316.7000122 1 !17; 327 275.3999939 290.6000061 345.5 313.3999939 1 !18; 278.7999878 282.1000061 316.2999878 317 311.2999878 -1 !19; 324.1000061 261.7999878 316.7999878 326.6000061 311.6000061 -1 !20; 249.3999939 260.7999878 292.7999878 317.2000122 310.2999878 1 !21; 336.2000122 290.7999878 273.2999878 400.3999939 310.7999878 1 !22; 317 291.8999939 303.5 326.5 310.8999939 -1 !23; 256.8999939 212 282 336.6000061 311.7000122 1 !24; 292.7999878 181 295.7999878 577.9000244 311.2999878 -1 !25; 342.7000122 306 294.2000122 487.3999939 312.1000061 -1 !26; 336.7000122 301.2999878 273.7000122 342.5 312.7999878 -1 !27; 302.2999878 250.8000031 297.2000122 331.2999878 312.1000061 -1 !28; 328 296.8999939 292.3999939 324.6000061 310.8999939 1 !29; 312 272 311.6000061 317.7000122 311.2000122 1 !30; 274.7000122 252.1000061 310.7000122 323.7000122 310.7999878 -1 !31; 315.1000061 301.2000122 300.8999939 410.8999939 310.8999939 -1 !32; 310 245.3000031 306 310.7000122 310.1000061 -1 !33; 335.2000122 298.6000061 314.7999878 317 310.8999939 1 !34; 345.2000122 353 326.3999939 409.1000061 311.2999878 1 !35; 348.7999878 357 326 436.5 311.8999939 -1 !36; 324 306.7000122 314 401.7000122 312.7000122 -1 !37; 365.1000061 345 330.7999878 382.2999878 311.8999939 1 !38; 369.2999878 356.7000122 322.6000061 1034.1 310.8999939 1 !39; 343.6000061 330.7999878 322.5 462.7999878 312.3999939 1 !40; 362.7999878 343 333.6000061 785.9000244 311.5 1 !41; 355.6000061 336.1000061 320.3999939 597.9000244 312.1000061 -1 !42; 357.3999939 378.6000061 323.7999878 891.2999878 311.6000061 1 !43; 350 347.2999878 343.3999939 538.7999878 313.5 1 !44; 379 369 333.1000061 716 315.5 1 !45; 344.2000122 359.6000061 333.7999878 436.6000061 311.8999939 1 !46; 357 322.5 317 363.3999939 311.7999878 -1 !47; 325.3999939 347.2999878 344.1000061 880.0999756 311.5 1 !48; 366.8999939 345.2999878 314.2000122 550.2999878 310.8999939 -1 !49; 353.7999878 359.5 335.1000061 425 312.6000061 -1 !50; 330.7000122 328.1000061 323.5 373.1000061 314 1 !51; 343.7999878 341.3999939 325.7000122 454.7999878 311.8999939 -1 !52; 345.2999878 331.5 295.6000061 400 311 1 !53; 334.3999939 318.5 315.7999878 459.1000061 311.5 -1 !54; 358.8999939 350.6000061 315.7999878 392 311.7999878 -1 !55; 359.8999939 344.6000061 336.3999939 620 311.7999878 -1 !56; 364.7999878 329.8999939 336.7000122 549.9000244 312.2999878 1 !57; 349 327.3999939 322.6000061 370.5 311.2999878 1 !58; 363 364.7000122 324.6000061 1081.7 311.7000122 1 !59; 330.1000061 363.5 330.6000061 617.5 311.1000061 1 !60; 363.7000122 345.8999939 336.3999939 599.5 312 -1 !61; 356.1000061 349.3999939 340.5 1010 311.8999939 1 !62; 358.2999878 363.1000061 317.1000061 474.3999939 311.8999939 -1 !63; 356.7000122 349.7999878 323.7999878 539.0999756 311.2000122 -1 !64; 370.2999878 369.5 317 536.5999756 312 1 !65; 327.8999939 326.2999878 330.3999939 415.6000061 311 -1 !66; 334.7000122 331.7000122 302.2000122 428.6000061 311.6000061 -1ENDDATA SUBMODEL ANN: ! The nice feature of the @ATAN( x) function is that it is a sigmoid. As x goes to -infinity, @ATAN( x) goes to - PI/2. As x goes to +infinity, @ATAN( x) goes to + PI/2; ! Examples of other sigmoid functions are: @TANH( x), 1/(1 + @EXP( -x), x/((1 + x^2)^0.5), x/(1 + @ABS( x)); ! Layer 0 is the input. There is one node in layer 0 for each explanatory variable. ! For each observation i, do the calculations; @FOR( OBS( i) : ! 1st hidden layer. For observation i, node k1 in layer 1, output Y1( i, k1) is a sigmoid function of a weighted average of the input nodes; @FOR( HIDLAY1( k1) : X1( i, k1) = BETA1C( k1) + @SUM( FEATURE( j) | j #NE# DEPVAR : BETA1X( k1, j)* XDAT( i, j)); Y1( i, k1) = @ATAN( X1( i, k1)); ! Convert to bounded interval; @FREE( X1( i, k1)); @FREE( Y1( i, k1)); ); ! 2nd hidden layer. For observation i, node k2 in layer 2, output Y2( i, k1) is a sigmoid function of a weighted average of the outputs of nodes in layer 1; @FOR( HIDLAY2( k2) : X2( i, k2) = BETA2C( k2) + @SUM( HIDLAY1( k1) : BETA2X( k2, k1)* Y1( i, k1)); Y2( i, k2) = @ATAN( X2( i, k2)); ! Convert to bounded interval; @FREE( Y2( i, k2)); @FREE( X2( i, k2)); ); ! The last layer predicts the output, output Y3( i) is a sigmoid function of a weighted average of the outputs of nodes in layer 2 Y3( i) should be close to -1 for group A, +1 for group B. Rescale (- PI/2, + PI/2) to (-1, +1); X3( i) = BETA3C + @SUM( HIDLAY2( k2) : BETA3X( k2)* Y2( i, k2)); Y3( i) = @ATAN( X3( i))*2/ @PI(); ! Convert to bounded interval (-1, +1); @FREE( X3( i)); @FREE( Y3( i)); ); ! Minimize the forecast error, excluding the holdout observation. If the model is good, the prediction for the holdout observation should be just as accurate as the predictions for the observations that were used to train the network; ErrTot = @SUM( OBS( i) | i #NE# HOLDOUT #AND# XDAT( i, DEPVAR) #GT# 0: 1 - Y3( i)) + @SUM( OBS( i) | i #NE# HOLDOUT #AND# XDAT( i, DEPVAR) #LT# 0: Y3( i) + 1); MIN = ErrToT; ! All the variables are unconstrained in sign; @FREE( BETA3C); @FOR( HIDLAY1( k): @FREE( BETA1C( k)); @FOR( FEATURE( j): @FREE( BETA1X( k, j)); ); ); @FOR( HIDLAY2( k): @FREE( BETA2C( k)); @FREE( BETA3X( k)); @FOR( HIDLAY1( j): @FREE( BETA2X( k, j)); ); ); ENDSUBMODEL
CALC:; @SET( 'MULTIS', 15); ! NLP multi-start attempts (0:LINGO decides, n:attempts); @SET( 'NTHRDS', 6); ! Set number of parallel threads to use; @SET( 'OROUTE', 1); ! Buffer size for routing output to window; @SET( 'TERSEO', 1); ! Output level (0:verb, 1:terse, 2:only errors, 3:none); @SET( 'TIMLIM', 99999); ! Time limit in seconds (0:no limit) for entire session; @SET( 'DUALCO', 0); ! Compute dual prices/reduced costs (0:No, 1:Prices, 2:+Ranges); @SET( 'WNLINE', 10000); ! Max command window lines saved (Windows only); @SET( 'REDUCE', 1); ! Reduce model? 0: Off, 1: On, 2: Solver decides; @SET( 'GLOBAL', 0); ! 0:Do not use Global solver, 1:Use the Globasolver; @SOLVE( ANN); ! Solve the Artificial Neural Network; @WRITE( ' Incorrectly predicted observations are:', @NEWLINE( 1), ' Obs Actual Predicted', @NEWLINE( 1)); @FOR( OBS( i) | @ABS( Y3( i) - XDAT( i, DEPVAR)) #GE# 1: @WRITE( @FORMAT( i, '5.0f'), ' ', @FORMAT( XDAT( i, DEPVAR), '3.0f'), ' ', @FORMAT( Y3( i), '5.2f'), @NEWLINE( 1)); ); @IFC( 0 #LT# HOLDOUT #AND# HOLDOUT #LE# @SIZE( OBS): @WRITE( @NEWLINE( 1),' For the Holdout observation:', @NEWLINE( 1)); @WRITE( @FORMAT( HOLDOUT, '5.0f'), ' ', @FORMAT( XDAT( HOLDOUT, DEPVAR), '3.0f'), ' ', @FORMAT( Y3( HOLDOUT), '5.2f'), @NEWLINE( 1)); ); ! In case we want to look at the model; ! @SMPI( '\temp\ANNGen04.mpi', ANN); ENDCALC