the design of BP Neural network should pay attention to the following several questions:
1. Number of layers of the network. The general three-layer network structure can approximate any rational function. Although the increase of network layer can improve the precision of calculation and reduce the error, it also complicates the network and increases the training time. If you really want to increase the number of layers, you should give priority to increasing the number of neurons in the hidden layer.
2. The number of nerve cells in the hidden layer. the improvement of network training accuracy can be obtained by means of an implicit layer and increasing the number of neurons. The specific design can make the hidden layer is the input layer twice times, and then appropriately add a little margin.
3. Selection of initial weight value. random weights in general area are random numbers of (-1, 1).
4. Learning rate. The learning rate determines the amount of weight change that is generated by each cycle of training. The high learning rate may lead to the instability of the system, but the low learning rate leads to a longer training time and may become less convergent. Generally take 0.01~08. The appropriate values can be selected by comparing the errors obtained at different rates.
5. Error selection. in the course of network training, the selection of errors should be determined by the comparison of training and the number of nodes relative to the implied layer.
The calculation steps and principles are shown below:
The test data are as follows:
114.6 1.1 0.71 85.0 346
132.4 0.97 0.54 73.0 410
103.5 0.96 0.66 67.0 385
179.3 0.88 0.59 89.0 446
92.7 1.15 0.44 154.0 300
115.0 0.74 0.65 252.0 453
163.6 0.85 0.58 220.0 495
139.5 0.70 0.59 217.0 478
76.7 0.95 0.51 162.0 341
42.1 1.08 0.47 110.0 326
77.8 1.19 0.57 91.0 364
100.6 0.82 0.59 83.0 456
55.3 0.96 0.4 69.0 300
152.1 1.04 0.49 77.0 433
81.0 1.08 0.54 96.0 336
29.8 0.83 0.49 120.0 289
248.6 0.79 0.5 147.0 483
64.9 0.59 0.5 147.0 483
95.7 1.02 0.48 160.0 384
89.9 0.96 0.39 105.0 314
121.8 0.83 0.60 140.0 401
78.5 0.89 0.44 94.0 280
90.0 0.95 0.43 89.0 301
The code is as follows:
#include "stdio.h"
#include "Stdlib.h"
#include "math.h"
#include "time.h"
#include "vector"
using namespace Std;
#define NOISEVAR 0.01//noise intensity (prevent over fitting)
#define ERRORVAR 0.001//Error setting
#define ALPHA 0.35//learning efficiency
#define LOOPNUM 10000//maximum number of cycles
#define HIDENUM 10//middle-tier hidden node points
#define DIMNUM 5//Dimension
#define DIMIN 4//Input layer dimension
#define DIMOUT 1//Output layer dimension
#define INF 99999
typedef vector<double> Doublevector;
Doublevector Maxsamp;
Doublevector Minsamp;
Vector<doublevector> Getfileinf (char *file); Get Training samples
vector<doublevector> Normalization (vector<doublevector> sample); Normalization of samples
Double Getrandnum (); Get random numbers
void Starttrain (vector<doublevector> sample); Start training
void Usebp (vector<doublevector> WX, vector<doublevector> WY); Using BP neural network
void Main ()
{
Char *file = "BP.txt";
Vector<doublevector> sample;
Sample = Getfileinf (File); Get samples
Sample = normalization (sample); Normalization of samples
Starttrain (sample); Start training
}
Start training
void Starttrain (vector<doublevector> sample)
{
int I, j, K, L, M, N;
Vector<doublevector> WX; Weights between the input layer and the hidden layer
Vector<doublevector> WY; The weights between the hidden layers and the output layers
Doublevector Threx; Threshold between the input layer and the hidden layer
Doublevector Threy; Threshold between the output layer and the hidden layer
Doublevector temp;
Get random weights (0.01~0.8)
Srand (Time (NULL));
Weights between the input layer and the hidden layer
for (i=0; i{
Temp.clear ();
For (j=0 j<dimin; j + +)
Temp.push_back (Getrandnum ());
Wx.push_back (temp);
}
The weights between the hidden layers and the output layers
for (i=0; i<dimout; i++)
{
Temp.clear ();
For (j=0 jTemp.push_back (Getrandnum ());
Wy.push_back (temp);
}
Threshold between the input layer and the hidden layer
for (i=0; iThrex.push_back (Getrandnum ());
Threshold between the output layer and the hidden layer
for (i=0; i<dimout; i++)
Threy.push_back (Getrandnum ());
Start Cycle Training
Double sum;
Doublevector Hideout (Hidenum); Hidden layer Output
Doublevector oout (dimout); Output Layer Output
Double Olde, Newe; Mean square value of the prescription difference between the output and the calculated output
Doublevector Outerror; Error of the output layer
Doublevector Hideerror; The error of the hidden layer
int flag = 0;
Olde = Newe = 0;
for (i=0; i<loopnum; i++)
{
Olde = Newe;
Newe = 0;
for (j=0; J<sample.size (); j + +)
{
Outerror.clear ();
Outerror.clear ();
Hideout.clear ();
Oout.clear ();
The input and output of each node in the hidden layer
for (k=0; k{
sum = 0;
for (l=0; l<dimin; l++)
Sum + + sample[j][l]*wx[l][k];
Hideout.push_back (1/(1+exp (-(SUM-THREX[K))));
}
Input and output of each node in the output layer
for (k=0; k<dimout; k++)
{
sum = 0;
for (l=0; lSum + + hideout[l]*wy[k][l];
Oout.push_back (1/(1+exp (-sum-threy[k)));
}
Calculates the mean square value of the prescription difference between the desired output and the calculated output
for (k=0; k<dimout; k++)
Newe + + (Sample[j][dimin+k]-oout[k]) * (Sample[j][dimin+k]-oout[k])/2.0;
Calculating error of each node in output layer
for (k=0; k<dimout; k++)
Outerror.push_back ((Sample[j][dimin+k]-oout[k]) *oout[k]* (1-oout[k));
Calculating the error of each node in hidden layer
for (k=0; k{
sum = 0;
for (l=0; l<dimout; l++)
Sum + + outerror[l]*wy[l][k];
Hideerror.push_back (sum*hideout[k]* (1-hideout[k));
}
Fixed input layer and implied layer weight value
for (m=0; mfor (n=0; n<dimin; n++)
Wx[m][n] = Wx[m][n]+alpha*hideerror[m]*sample[j][n];
Correction of weights between hidden layers and output layers
for (m=0; m<dimout; m++)
for (n=0; nWy[m][n] = Wy[m][n]+alpha*outerror[m]*hideout[n];
Fixed input layer and hidden layer threshold
for (m=0; mTHREX[M] = threx[m]-alpha*hideerror[m];
Correction of thresholds between hidden layers and output layers
for (m=0; m<dimout; m++)
THREX[M] = threy[m]-alpha*outerror[m];
}
Error judgment
if (Newe/sample.size () <errorvar)
{
printf ("Training End!\n training times:%d%lf\n", I, Newe);
Break
}
}
printf ("implied layer weight: \ n");
for (i=0; i{
For (j=0 j<dimin; j + +)
printf ("%lf", Wx[i][j]);
printf ("\ n");
}
printf ("\ n");
printf ("Output layer weight: \ n");
for (i=0; i{
For (j=0 j<dimout; j + +)
printf ("%lf", Wx[i][j]);
printf ("\ n");
}
USEBP (WX, WY);
}
Using BP neural network
void Usebp (vector<doublevector> WX, vector<doublevector> WY)
{
int I, J;
Double Input[dimin];
Doublevector Hideout;
Doublevector oout;
Double sum;
while (1)
{
Hideout.clear ();
Oout.clear ();
printf ("Enter data: \ n");
for (i=0; i<dimin; i++)
scanf ("%lf", &input[i]);
Normalization of samples to be tested
for (i=0; i<dimin; i++)
Input[i] = (0.002+0.996* (input[i]-minsamp[i))/(Maxsamp[i]-minsamp[i));
The input and output of each node in the hidden layer
for (i=0; i{
sum = 0;
For (j=0 j<dimin; j + +)
Sum + + input[j]+wx[j][i];
Hideout.push_back (1/(1+exp (-sum)));
}
Input and output of each node in the output layer
for (i=0; i<dimout; i++)
{
sum = 0;
For (j=0 jSum + + hideout[j]*wy[i][j];
Oout.push_back (1/(1+exp (-sum)));
}
for (i=0; I<oout.size (); i++)
Oout[i] = (oout[i]* (maxsamp[dimin+i]-minsamp[dimin+i]) -0.02)/0.996+minsamp[dimin+i];
printf ("Expected Result: \ n");
for (i=0; I<oout.size (); i++)
printf ("%lf", Oout[i]);
printf ("\ n \ nthe");
}
}
Normalization of samples
vector<doublevector> Normalization (vector<doublevector> sample)
{
Vector<doublevector> DST;
int I, J;
Doublevector Max (dimnum, 0);
Doublevector min (dimnum, INF);
Doublevector temp;
Looking for sample maximum and minimum values
for (i=0; i<dimnum; i++)
for (j=0; J<sample.size (); j + +)
{
if (Max[i]<sample[j][i])
Max[i] = Sample[j][i];
if (Min[i]>sample[j][i])
Min[i] = Sample[j][i];
}
Minsamp = min;
Maxsamp = max;
Normalization of samples
for (i=0; I<sample.size (); i++)
{
Temp.clear ();
For (j=0 j<dimnum; j + +)
Temp.push_back (0.002+0.996* (SAMPLE[I][J]-MIN[J))/(MAX[J]-MIN[J));
Dst.push_back (temp);
}
return DST;
}
Get File Data
Vector<doublevector> Getfileinf (char *file)
{
int i=1;
Vector<doublevector> DST;
Doublevector temp;
Double num;
FILE *FP;
fp = fopen (File, "R");
if (fp = NULL)
{
printf ("Open file error!\n");
Exit (0);
}
Reading data from a file
while (FSCANF (FP, "%lf", &num)!=eof)
{
Temp.push_back (num);
if (i%dimnum==0)
{
Dst.push_back (temp);
Temp.clear ();
}
i++;
}
Fclose (FP);
return DST;
}
Get random numbers
Double Getrandnum ()
{
Double num;
num = rand ()%rand_max;
num = 0.8*num/rand_max;
return num;
}