<span>/// <summary></span>
<span>/// Runs learning epoch.</span>
<span>/// </summary></span>
<span>/// </span>
<span>/// <param name="input">Array of input vectors.</param></span>
<span>/// <param name="output">Array of output vectors.</param></span>
<span>/// </span>
<span>/// <returns>Returns summary learning error for the epoch.</returns></span>
<span>/// </span>
<span>/// <remarks><para>The method runs one learning epoch, by calling running necessary</span>
<span>/// iterations of the Levenberg Marquardt to achieve an error decrease.</remarks></span>
<span>///</span>
<span>public</span> <span>double</span> RunEpoch(<span>double</span>[][] input, <span>double</span>[][] output)
{
<span>// Initial definitions and memmory allocations</span>
<span>int</span> N = input.Length;
<span>this</span>.errors = <span>new</span> <span>double</span>[N];
<span>this</span>.jacobian = <span>new</span> <span>double</span>[N, numberOfParameters];
LuDecomposition decomposition = <span>null</span>;
<span>double</span> sumOfSquaredErrors = 0.0;
<span>double</span> sumOfSquaredWeights = 0.0;
<span>double</span> trace = 0.0;
<span>// Compute the Jacobian matrix</span>
<span>if</span> (method == JacobianMethod.ByBackpropagation)
{
sumOfSquaredErrors = JacobianByChainRule(input, output);
}
<span>else</span>
{
sumOfSquaredErrors = JacobianByFiniteDifference(input, output);
}
<span>// Create the initial weights vector</span>
sumOfSquaredWeights = CreateWeights();
<span>// Compute Jacobian errors and Hessian</span>
<span>for</span> (<span>int</span> i = 0; i < numberOfParameters; i++)
{
<span>// Compute Jacobian Matrix Errors</span>
<span>double</span> s = 0.0;
<span>for</span> (<span>int</span> j = 0; j < N; j++)
{
s += jacobian[j, i] * errors[j];
}
gradient[i] = s;
<span>// Compute Quasi-Hessian Matrix using Jacobian (H = J'J)</span>
<span>for</span> (<span>int</span> j = 0; j < numberOfParameters; j++)
{
<span>double</span> c = 0.0;
<span>for</span> (<span>int</span> k = 0; k < N; k++)
{
c += jacobian[k, i] * jacobian[k, j];
}
hessian[i, j] = beta * c;
}
}
<span>// Store the Hessian diagonal for future computations</span>
<span>for</span> (<span>int</span> i = 0; i < numberOfParameters; i++)
diagonal[i] = hessian[i, i];
<span>// Define the objective function</span>
<span>// bayesian regularization objective function</span>
<span>double</span> objective = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights;
<span>double</span> current = objective + 1.0;
<span>// Begin of the main Levenberg-Macquardt method</span>
lambda /= v;
<span>// We'll try to find a direction with less error</span>
<span>// (or where the objective function is smaller)</span>
<span>while</span> (current >= objective && lambda < lambdaMax)
{
lambda *= v;
<span>// Update diagonal (Levenberg-Marquardt formula)</span>
<span>for</span> (<span>int</span> i = 0; i < numberOfParameters; i++)
hessian[i, i] = diagonal[i] + (lambda + alpha);
<span>// Decompose to solve the linear system</span>
decomposition = <span>new</span> LuDecomposition(hessian);
<span>// Check if the Jacobian has become non-invertible</span>
<span>if</span> (!decomposition.NonSingular) <span>continue</span>;
<span>// Solve using LU (or SVD) decomposition</span>
deltas = decomposition.Solve(gradient);
<span>// Update weights using the calculated deltas</span>
sumOfSquaredWeights = UpdateWeights();
<span>// Calculate the new error</span>
sumOfSquaredErrors = 0.0;
<span>for</span> (<span>int</span> i = 0; i < N; i++)
{
network.Compute(input[i]);
sumOfSquaredErrors += CalculateError(output[i]);
}
<span>// Update the objective function</span>
current = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights;
<span>// If the object function is bigger than before, the method</span>
<span>// is tried again using a greater dumping factor.</span>
}
<span>// If this iteration caused a error drop, then next iteration</span>
<span>// will use a smaller damping factor.</span>
lambda /= v;
<span>// If we are using bayesian regularization, we need to</span>
<span>// update the bayesian hyperparameters alpha and beta</span>
<span>if</span> (useBayesianRegularization)
{
<span>// References: </span>
<span>// - http://www-alg.ist.hokudai.ac.jp/~jan/alpha.pdf</span>
<span>// - http://www.inference.phy.cam.ac.uk/mackay/Bayes_FAQ.html</span>
<span>// Compute the trace for the inverse hessian</span>
trace = Matrix.Trace(decomposition.Inverse());
<span>// Poland update's formula:</span>
gamma = numberOfParameters - (alpha * trace);
alpha = numberOfParameters / (2.0 * sumOfSquaredWeights + trace);
beta = System.Math.Abs((N - gamma) / (2.0 * sumOfSquaredErrors));
<span>//beta = (N - gama) / (2.0 * sumOfSquaredErrors);</span>
<span>// Original MacKay's update formula:</span>
<span>// gama = (double)networkParameters - (alpha * trace);</span>
<span>// alpha = gama / (2.0 * sumOfSquaredWeights);</span>
<span>// beta = (gama - N) / (2.0 * sumOfSquaredErrors);</span>
}
<span>return</span> sumOfSquaredErrors;
}