From 6ca8a31143f087f3bc470d39eb3c00156443802a Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Thu, 23 Nov 2023 08:38:59 -0800 Subject: Formatting. --- src/lib/include/neuralnet/matrix.h | 15 ++- src/lib/include/neuralnet/neuralnet.h | 8 +- src/lib/include/neuralnet/train.h | 20 ++- src/lib/src/activation.h | 12 +- src/lib/src/matrix.c | 82 ++++++------ src/lib/src/neuralnet.c | 65 +++++----- src/lib/src/neuralnet_impl.h | 12 +- src/lib/src/train.c | 236 ++++++++++++++++++---------------- 8 files changed, 237 insertions(+), 213 deletions(-) diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h index 0cb40cf..b7281bf 100644 --- a/src/lib/include/neuralnet/matrix.h +++ b/src/lib/include/neuralnet/matrix.h @@ -33,7 +33,8 @@ void nnMatrixToArray(const nnMatrix* in, R* out); void nnMatrixRowToArray(const nnMatrix* in, int row, R* out); /// Copy a column from a source to a target matrix. -void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out); +void nnMatrixCopyCol( + const nnMatrix* in, nnMatrix* out, int col_in, int col_out); /// Mutable borrow of a matrix. nnMatrix nnMatrixBorrow(nnMatrix* in); @@ -56,20 +57,24 @@ void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); /// /// This function multiples two matrices row-by-row instead of row-by-column. /// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O). -void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); +void nnMatrixMulRows( + const nnMatrix* left, const nnMatrix* right, nnMatrix* out); /// Matrix multiply-add. /// /// out = left + (right * scale) -void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); +void nnMatrixMulAdd( + const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); /// Matrix multiply-subtract. /// /// out = left - (right * scale) -void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); +void nnMatrixMulSub( + const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); /// Hadamard product of two matrices. -void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); +void nnMatrixMulPairs( + const nnMatrix* left, const nnMatrix* right, nnMatrix* out); /// Add two matrices. void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h index 1cf1c53..05c9406 100644 --- a/src/lib/include/neuralnet/neuralnet.h +++ b/src/lib/include/neuralnet/neuralnet.h @@ -5,7 +5,7 @@ typedef struct nnMatrix nnMatrix; typedef struct nnNeuralNetwork nnNeuralNetwork; -typedef struct nnQueryObject nnQueryObject; +typedef struct nnQueryObject nnQueryObject; /// Neuron activation. typedef enum nnActivation { @@ -15,7 +15,8 @@ typedef enum nnActivation { } nnActivation; /// Create a network. -nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations); +nnNeuralNetwork* nnMakeNet( + int num_layers, const int* layer_sizes, const nnActivation* activations); /// Delete the network and free its internal memory. void nnDeleteNet(nnNeuralNetwork**); @@ -36,7 +37,8 @@ void nnSetBiases(nnNeuralNetwork*, const R* biases); void nnQuery(const nnNeuralNetwork*, nnQueryObject*, const nnMatrix* input); /// Query the network, array version. -void nnQueryArray(const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output); +void nnQueryArray( + const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output); /// Create a query object. /// diff --git a/src/lib/include/neuralnet/train.h b/src/lib/include/neuralnet/train.h index 79f8e7b..6d811c2 100644 --- a/src/lib/include/neuralnet/train.h +++ b/src/lib/include/neuralnet/train.h @@ -14,18 +14,18 @@ typedef struct nnMatrix nnMatrix; /// activation with many inputs. Thus, a (0,1) initialization is really /// (0,scale), for example. typedef enum nnWeightInitStrategy { - nnWeightInit01, // (0,1) range. - nnWeightInit11, // (-1,+1) range. - nnWeightInitNormal, // Normal distribution. + nnWeightInit01, // (0,1) range. + nnWeightInit11, // (-1,+1) range. + nnWeightInitNormal, // Normal distribution. } nnWeightInitStrategy; /// Network training parameters. typedef struct nnTrainingParams { - R learning_rate; - int max_iterations; - uint64_t seed; + R learning_rate; + int max_iterations; + uint64_t seed; nnWeightInitStrategy weight_init; - bool debug; + bool debug; } nnTrainingParams; /// Train the network. @@ -36,7 +36,5 @@ typedef struct nnTrainingParams { /// |targets| is a matrix of targets, one row per target and as many columns as /// the target's dimension. void nnTrain( - nnNeuralNetwork*, - const nnMatrix* inputs, - const nnMatrix* targets, - const nnTrainingParams*); + nnNeuralNetwork*, const nnMatrix* inputs, const nnMatrix* targets, + const nnTrainingParams*); diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h index 42ab73f..b56a69e 100644 --- a/src/lib/src/activation.h +++ b/src/lib/src/activation.h @@ -4,17 +4,13 @@ #include -static inline R sigmoid(R x) { - return 1. / (1. + exp(-x)); -} +static inline R sigmoid(R x) { return 1. / (1. + exp(-x)); } -static inline R relu(R x) { - return fmax(0, x); -} +static inline R relu(R x) { return fmax(0, x); } #define NN_MAP_ARRAY(f, in, out, size) \ - for (int i = 0; i < size; ++i) { \ - out[i] = f(in[i]); \ + for (int i = 0; i < size; ++i) { \ + out[i] = f(in[i]); \ } #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c index f937c01..174504f 100644 --- a/src/lib/src/matrix.c +++ b/src/lib/src/matrix.c @@ -8,10 +8,10 @@ nnMatrix nnMatrixMake(int rows, int cols) { R* values = calloc(rows * cols, sizeof(R)); assert(values != 0); - return (nnMatrix) { - .rows = rows, - .cols = cols, - .values = values, + return (nnMatrix){ + .rows = rows, + .cols = cols, + .values = values, }; } @@ -21,8 +21,8 @@ void nnMatrixDel(nnMatrix* matrix) { if (matrix->values != 0) { free(matrix->values); matrix->values = 0; - matrix->rows = 0; - matrix->cols = 0; + matrix->rows = 0; + matrix->cols = 0; } } @@ -30,12 +30,12 @@ void nnMatrixMove(nnMatrix* in, nnMatrix* out) { assert(in); assert(out); - out->rows = in->rows; - out->cols = in->cols; + out->rows = in->rows; + out->cols = in->cols; out->values = in->values; - in->rows = 0; - in->cols = 0; + in->rows = 0; + in->cols = 0; in->values = 0; } @@ -45,8 +45,8 @@ void nnMatrixCopy(const nnMatrix* in, nnMatrix* out) { assert(in->rows == out->rows); assert(in->cols == out->cols); - const R* in_value = in->values; - R* out_value = out->values; + const R* in_value = in->values; + R* out_value = out->values; for (int i = 0; i < in->rows * in->cols; ++i) { *out_value++ = *in_value++; @@ -73,7 +73,8 @@ void nnMatrixRowToArray(const nnMatrix* in, int row, R* out) { } } -void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out) { +void nnMatrixCopyCol( + const nnMatrix* in, nnMatrix* out, int col_in, int col_out) { assert(in); assert(out); assert(in->rows == out->rows); @@ -89,8 +90,8 @@ nnMatrix nnMatrixBorrow(nnMatrix* in) { assert(in); nnMatrix out; - out.rows = in->rows; - out.cols = in->cols; + out.rows = in->rows; + out.cols = in->cols; out.values = in->values; return out; } @@ -101,8 +102,8 @@ nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows) { assert(row_start + num_rows <= in->rows); nnMatrix out; - out.rows = num_rows; - out.cols = in->cols; + out.rows = num_rows; + out.cols = in->cols; out.values = nnMatrixRow_mut(in, row_start); return out; } @@ -139,9 +140,9 @@ void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { const R* p_left_value = &left->values[i * left->cols]; for (int j = 0; j < left->cols; ++j) { - const R left_value = *p_left_value; + const R left_value = *p_left_value; const R* right_value = &right->values[j * right->cols]; - R* out_value = &out->values[i * out->cols]; + R* out_value = &out->values[i * out->cols]; for (int k = 0; k < right->cols; ++k) { *out_value++ += left_value * *right_value++; @@ -152,7 +153,8 @@ void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { } } -void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { +void nnMatrixMulRows( + const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { assert(left != 0); assert(right != 0); assert(out != 0); @@ -165,7 +167,7 @@ void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) R* out_value = out->values; for (int i = 0; i < left->rows; ++i) { - const R* left_row = &left->values[i * left->cols]; + const R* left_row = &left->values[i * left->cols]; const R* right_value = right->values; for (int j = 0; j < right->rows; ++j) { @@ -181,7 +183,8 @@ void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) } } -void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { +void nnMatrixMulAdd( + const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { assert(left); assert(right); assert(out); @@ -190,16 +193,17 @@ void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatr assert(left->rows == out->rows); assert(left->cols == out->cols); - const R* left_value = left->values; + const R* left_value = left->values; const R* right_value = right->values; - R* out_value = out->values; + R* out_value = out->values; for (int i = 0; i < left->rows * left->cols; ++i) { *out_value++ = *left_value++ + *right_value++ * scale; } } -void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { +void nnMatrixMulSub( + const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { assert(left); assert(right); assert(out); @@ -208,16 +212,17 @@ void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatr assert(left->rows == out->rows); assert(left->cols == out->cols); - const R* left_value = left->values; + const R* left_value = left->values; const R* right_value = right->values; - R* out_value = out->values; + R* out_value = out->values; for (int i = 0; i < left->rows * left->cols; ++i) { *out_value++ = *left_value++ - *right_value++ * scale; } } -void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { +void nnMatrixMulPairs( + const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { assert(left != 0); assert(right != 0); assert(out != 0); @@ -226,9 +231,9 @@ void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out assert(left->rows == out->rows); assert(left->cols == out->cols); - R* left_value = left->values; + R* left_value = left->values; R* right_value = right->values; - R* out_value = out->values; + R* out_value = out->values; for (int i = 0; i < left->rows * left->cols; ++i) { *out_value++ = *left_value++ * *right_value++; @@ -244,9 +249,9 @@ void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { assert(left->rows == out->rows); assert(left->cols == out->cols); - const R* left_value = left->values; + const R* left_value = left->values; const R* right_value = right->values; - R* out_value = out->values; + R* out_value = out->values; for (int i = 0; i < left->rows * left->cols; ++i) { *out_value++ = *left_value++ + *right_value++; @@ -262,16 +267,17 @@ void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { assert(left->rows == out->rows); assert(left->cols == out->cols); - const R* left_value = left->values; + const R* left_value = left->values; const R* right_value = right->values; - R* out_value = out->values; + R* out_value = out->values; for (int i = 0; i < left->rows * left->cols; ++i) { *out_value++ = *left_value++ - *right_value++; } } -void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) { +void nnMatrixAddRow( + const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) { assert(matrix); assert(row); assert(out); @@ -281,7 +287,7 @@ void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) assert(matrix->cols == out->cols); const R* matrix_value = matrix->values; - R* out_value = out->values; + R* out_value = out->values; for (int i = 0; i < matrix->rows; ++i) { const R* row_value = row->values; @@ -320,8 +326,8 @@ void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out) { assert(in->rows == out->rows); assert(in->cols == out->cols); - const R* in_value = in->values; - R* out_value = out->values; + const R* in_value = in->values; + R* out_value = out->values; for (int i = 0; i < in->rows * in->cols; ++i) { *out_value++ = (*in_value++) > threshold ? 1 : 0; diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c index cac611a..a5fc59b 100644 --- a/src/lib/src/neuralnet.c +++ b/src/lib/src/neuralnet.c @@ -1,13 +1,14 @@ #include -#include #include "activation.h" #include "neuralnet_impl.h" +#include #include #include -nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations) { +nnNeuralNetwork* nnMakeNet( + int num_layers, const int* layer_sizes, const nnActivation* activations) { assert(num_layers > 0); assert(layer_sizes); assert(activations); @@ -19,10 +20,10 @@ nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActiv net->num_layers = num_layers; - net->weights = calloc(num_layers, sizeof(nnMatrix)); - net->biases = calloc(num_layers, sizeof(nnMatrix)); + net->weights = calloc(num_layers, sizeof(nnMatrix)); + net->biases = calloc(num_layers, sizeof(nnMatrix)); net->activations = calloc(num_layers, sizeof(nnActivation)); - if ( (net->weights == 0) || (net->biases == 0) || (net->activations == 0) ) { + if ((net->weights == 0) || (net->biases == 0) || (net->activations == 0)) { nnDeleteNet(&net); return 0; } @@ -30,15 +31,15 @@ nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActiv for (int l = 0; l < num_layers; ++l) { // layer_sizes = { input layer size, first hidden layer size, ...} const int layer_input_size = layer_sizes[l]; - const int layer_output_size = layer_sizes[l+1]; + const int layer_output_size = layer_sizes[l + 1]; // We store the transpose of the weight matrix as written in textbooks. // Our vectors are row vectors and the matrices row-major. const int rows = layer_input_size; const int cols = layer_output_size; - net->weights[l] = nnMatrixMake(rows, cols); - net->biases[l] = nnMatrixMake(1, cols); + net->weights[l] = nnMatrixMake(rows, cols); + net->biases[l] = nnMatrixMake(1, cols); net->activations[l] = activations[l]; } @@ -46,7 +47,7 @@ nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActiv } void nnDeleteNet(nnNeuralNetwork** net) { - if ( (!net) || (!(*net)) ) { + if ((!net) || (!(*net))) { return; } if ((*net)->weights != 0) { @@ -77,7 +78,7 @@ void nnSetWeights(nnNeuralNetwork* net, const R* weights) { for (int l = 0; l < net->num_layers; ++l) { nnMatrix* layer_weights = &net->weights[l]; - R* layer_values = layer_weights->values; + R* layer_values = layer_weights->values; for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) { *layer_values++ = *weights++; @@ -91,7 +92,7 @@ void nnSetBiases(nnNeuralNetwork* net, const R* biases) { for (int l = 0; l < net->num_layers; ++l) { nnMatrix* layer_biases = &net->biases[l]; - R* layer_values = layer_biases->values; + R* layer_values = layer_biases->values; for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) { *layer_values++ = *biases++; @@ -99,7 +100,8 @@ void nnSetBiases(nnNeuralNetwork* net, const R* biases) { } } -void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) { +void nnQuery( + const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) { assert(net); assert(query); assert(input); @@ -123,29 +125,34 @@ void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* i // We could also rewrite the original Mul function to go row x row, // decomposing the multiplication. Preserving the original meaning of Mul // makes everything clearer. - nnMatrix output_vector = nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); + nnMatrix output_vector = + nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); nnMatrixMul(&input_vector, layer_weights, &output_vector); nnMatrixAddRow(&output_vector, layer_biases, &output_vector); switch (net->activations[l]) { - case nnIdentity: - break; // Nothing to do for the identity function. - case nnSigmoid: - sigmoid_array(output_vector.values, output_vector.values, output_vector.cols); - break; - case nnRelu: - relu_array(output_vector.values, output_vector.values, output_vector.cols); - break; - default: - assert(0); + case nnIdentity: + break; // Nothing to do for the identity function. + case nnSigmoid: + sigmoid_array( + output_vector.values, output_vector.values, output_vector.cols); + break; + case nnRelu: + relu_array( + output_vector.values, output_vector.values, output_vector.cols); + break; + default: + assert(0); } - input_vector = output_vector; // Borrow. + input_vector = output_vector; // Borrow. } } } -void nnQueryArray(const nnNeuralNetwork* net, nnQueryObject* query, const R* input, R* output) { +void nnQueryArray( + const nnNeuralNetwork* net, nnQueryObject* query, const R* input, + R* output) { assert(net); assert(query); assert(input); @@ -177,9 +184,9 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { return 0; } for (int l = 0; l < net->num_layers; ++l) { - const nnMatrix* layer_weights = &net->weights[l]; - const int layer_output_size = nnLayerOutputSize(layer_weights); - query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); + const nnMatrix* layer_weights = &net->weights[l]; + const int layer_output_size = nnLayerOutputSize(layer_weights); + query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); } query->network_outputs = &query->layer_outputs[net->num_layers - 1]; @@ -187,7 +194,7 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { } void nnDeleteQueryObject(nnQueryObject** query) { - if ( (!query) || (!(*query)) ) { + if ((!query) || (!(*query))) { return; } if ((*query)->layer_outputs != 0) { diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h index 26107b5..18694f4 100644 --- a/src/lib/src/neuralnet_impl.h +++ b/src/lib/src/neuralnet_impl.h @@ -14,10 +14,10 @@ /// /// w11 w12 w21 w22 typedef struct nnNeuralNetwork { - int num_layers; // Number of non-input layers (hidden + output). - nnMatrix* weights; // One matrix per non-input layer. - nnMatrix* biases; // One vector per non-input layer. - nnActivation* activations; // One per non-input layer. + int num_layers; // Number of non-input layers (hidden + output). + nnMatrix* weights; // One matrix per non-input layer. + nnMatrix* biases; // One vector per non-input layer. + nnActivation* activations; // One per non-input layer. } nnNeuralNetwork; /// A query object that holds all the memory necessary to query a network. @@ -31,6 +31,6 @@ typedef struct nnNeuralNetwork { /// convenience. typedef struct nnQueryObject { int num_layers; - nnMatrix* layer_outputs; // Output matrices, one output per layer. - nnMatrix* network_outputs; // Points to the last output matrix. + nnMatrix* layer_outputs; // Output matrices, one output per layer. + nnMatrix* network_outputs; // Points to the last output matrix. } nnTrainingQueryObject; diff --git a/src/lib/src/train.c b/src/lib/src/train.c index 3061a99..9244907 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c @@ -1,7 +1,7 @@ #include -#include #include "neuralnet_impl.h" +#include #include #include @@ -14,13 +14,13 @@ #define LOGD printf // If debug mode is requested, we will show progress every this many iterations. -static const int PROGRESS_THRESHOLD = 5; // % +static const int PROGRESS_THRESHOLD = 5; // % /// Computes the total MSE from the output error matrix. R ComputeMSE(const nnMatrix* errors) { - R sum_sq = 0; - const int N = errors->rows * errors->cols; - const R* value = errors->values; + R sum_sq = 0; + const int N = errors->rows * errors->cols; + const R* value = errors->values; for (int i = 0; i < N; ++i) { sum_sq += *value * *value; value++; @@ -30,7 +30,7 @@ R ComputeMSE(const nnMatrix* errors) { /// Holds the bits required to compute a sigmoid gradient. typedef struct nnSigmoidGradientElements { - nnMatrix ones; // A vector of just ones, same size as the layer. + nnMatrix ones; // A vector of just ones, same size as the layer. } nnSigmoidGradientElements; /// Holds the various elements required to compute gradients. These depend on @@ -49,7 +49,8 @@ typedef struct nnGradientElements { } nnGradientElements; // Initialize the network's weights randomly and set their biases to 0. -void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) { +void nnInitNet( + nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) { assert(net); mt19937_64 rng = mt19937_64_make(); @@ -60,41 +61,42 @@ void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy s nnMatrix* biases = &net->biases[l]; const R layer_size = (R)nnLayerInputSize(weights); - const R scale = 1. / layer_size; - const R stdev = 1. / sqrt((R)layer_size); - const R sigma = stdev * stdev; + const R scale = 1. / layer_size; + const R stdev = 1. / sqrt((R)layer_size); + const R sigma = stdev * stdev; R* value = weights->values; for (int k = 0; k < weights->rows * weights->cols; ++k) { switch (strategy) { - case nnWeightInit01: { - const R x01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. - *value++ = scale * x01; - break; - } - case nnWeightInit11: { - const R x11 = mt19937_64_gen_real4(&rng); // (-1, +1) interval. - *value++ = scale * x11; - break; + case nnWeightInit01: { + const R x01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. + *value++ = scale * x01; + break; + } + case nnWeightInit11: { + const R x11 = mt19937_64_gen_real4(&rng); // (-1, +1) interval. + *value++ = scale * x11; + break; + } + case nnWeightInitNormal: { + // Using initialization with a normal distribution of standard + // deviation 1 / sqrt(num_layer_weights) to prevent saturation when + // multiplying inputs. + const R u01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. + const R v01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. + R z0, z1; + normal2(u01, v01, &z0, &z1); + z0 = normal_transform(z0, /*mu=*/0, sigma); + z1 = normal_transform(z1, /*mu=*/0, sigma); + *value++ = z0; + if (k < weights->rows * weights->cols - 1) { + *value++ = z1; + ++k; } - case nnWeightInitNormal: - // Using initialization with a normal distribution of standard - // deviation 1 / sqrt(num_layer_weights) to prevent saturation when - // multiplying inputs. - const R u01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. - const R v01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. - R z0, z1; - normal2(u01, v01, &z0, &z1); - z0 = normal_transform(z0, /*mu=*/0, sigma); - z1 = normal_transform(z1, /*mu=*/0, sigma); - *value++ = z0; - if (k < weights->rows * weights->cols - 1) { - *value++ = z1; - ++k; - } - break; - default: - assert(false); + break; + } + default: + assert(false); } } @@ -112,9 +114,7 @@ void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy s // // For now, each iteration trains with one sample (row) at a time. void nnTrain( - nnNeuralNetwork* net, - const nnMatrix* inputs, - const nnMatrix* targets, + nnNeuralNetwork* net, const nnMatrix* inputs, const nnMatrix* targets, const nnTrainingParams* params) { assert(net); assert(inputs); @@ -129,34 +129,35 @@ void nnTrain( nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); // Allocate the weight transpose matrices up front for backpropagation. - //nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); + // nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); // Allocate the weight delta matrices. nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); // Allocate the data structures required to compute gradients. // This depends on each layer's activation type. - nnGradientElements* gradient_elems = calloc(net->num_layers, sizeof(nnGradientElements)); + nnGradientElements* gradient_elems = + calloc(net->num_layers, sizeof(nnGradientElements)); // Allocate the output transpose vectors for weight delta calculation. // This is one column vector per layer. nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); assert(errors != 0); - //assert(weights_T != 0); + // assert(weights_T != 0); assert(weight_deltas != 0); assert(gradient_elems); assert(outputs_T); for (int l = 0; l < net->num_layers; ++l) { - const nnMatrix* layer_weights = &net->weights[l]; - const int layer_output_size = net->weights[l].cols; - const nnActivation activation = net->activations[l]; + const nnMatrix* layer_weights = &net->weights[l]; + const int layer_output_size = net->weights[l].cols; + const nnActivation activation = net->activations[l]; errors[l] = nnMatrixMake(1, layer_weights->cols); - //weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); - //nnMatrixTranspose(layer_weights, &weights_T[l]); + // weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); + // nnMatrixTranspose(layer_weights, &weights_T[l]); weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols); @@ -164,21 +165,21 @@ void nnTrain( // Allocate the gradient elements and vectors for weight delta calculation. nnGradientElements* elems = &gradient_elems[l]; - elems->type = activation; + elems->type = activation; switch (activation) { - case nnIdentity: - break; // Gradient vector will be borrowed, no need to allocate. - - case nnSigmoid: - elems->gradient = nnMatrixMake(1, layer_output_size); - // Allocate the 1s vectors. - elems->sigmoid.ones = nnMatrixMake(1, layer_output_size); - nnMatrixInitConstant(&elems->sigmoid.ones, 1); - break; - - case nnRelu: - elems->gradient = nnMatrixMake(1, layer_output_size); - break; + case nnIdentity: + break; // Gradient vector will be borrowed, no need to allocate. + + case nnSigmoid: + elems->gradient = nnMatrixMake(1, layer_output_size); + // Allocate the 1s vectors. + elems->sigmoid.ones = nnMatrixMake(1, layer_output_size); + nnMatrixInitConstant(&elems->sigmoid.ones, 1); + break; + + case nnRelu: + elems->gradient = nnMatrixMake(1, layer_output_size); + break; } } @@ -195,9 +196,9 @@ void nnTrain( // If debug mode is requested, we will show progress every Nth iteration. const int progress_frame = - (params->max_iterations < PROGRESS_THRESHOLD) - ? 1 - : (params->max_iterations * PROGRESS_THRESHOLD / 100); + (params->max_iterations < PROGRESS_THRESHOLD) + ? 1 + : (params->max_iterations * PROGRESS_THRESHOLD / 100); // --- TRAIN @@ -209,8 +210,10 @@ void nnTrain( for (int sample = 0; sample < inputs->rows; ++sample) { // Slice the input and target matrices with the batch size. // We are not mutating the inputs, but we need the cast to borrow. - nnMatrix training_inputs = nnMatrixBorrowRows((nnMatrix*)inputs, sample, 1); - nnMatrix training_targets = nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); + nnMatrix training_inputs = + nnMatrixBorrowRows((nnMatrix*)inputs, sample, 1); + nnMatrix training_targets = + nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); // Will need the input transposed for backpropagation. // Assuming one training input per iteration for now. @@ -221,8 +224,10 @@ void nnTrain( // part of the derivative, -2(t-o). Also, we compute o-t instead to // remove that outer negative sign. nnQuery(net, query, &training_inputs); - //nnMatrixSub(&training_targets, training_outputs, &errors[net->num_layers - 1]); - nnMatrixSub(training_outputs, &training_targets, &errors[net->num_layers - 1]); + // nnMatrixSub(&training_targets, training_outputs, + // &errors[net->num_layers - 1]); + nnMatrixSub( + training_outputs, &training_targets, &errors[net->num_layers - 1]); // Update outputs_T, which we need during weight updates. for (int l = 0; l < net->num_layers; ++l) { @@ -232,12 +237,12 @@ void nnTrain( // Update weights and biases for each internal layer, backpropagating // errors along the way. for (int l = net->num_layers - 1; l >= 0; --l) { - const nnMatrix* layer_output = &query->layer_outputs[l]; - nnMatrix* layer_weights = &net->weights[l]; - nnMatrix* layer_biases = &net->biases[l]; - nnGradientElements* elems = &gradient_elems[l]; - nnMatrix* gradient = &elems->gradient; - const nnActivation activation = net->activations[l]; + const nnMatrix* layer_output = &query->layer_outputs[l]; + nnMatrix* layer_weights = &net->weights[l]; + nnMatrix* layer_biases = &net->biases[l]; + nnGradientElements* elems = &gradient_elems[l]; + nnMatrix* gradient = &elems->gradient; + const nnActivation activation = net->activations[l]; // Compute the gradient (the part of the expression that does not // contain the output of the previous layer). @@ -246,55 +251,58 @@ void nnTrain( // Sigmoid: G = error_k * output_k * (1 - output_k). // Relu: G = error_k * (output_k > 0 ? 1 : 0) switch (activation) { - case nnIdentity: - // TODO: Just copy the pointer? - *gradient = nnMatrixBorrow(&errors[l]); - break; - case nnSigmoid: - nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); - nnMatrixMulPairs(layer_output, gradient, gradient); - nnMatrixMulPairs(&errors[l], gradient, gradient); - break; - case nnRelu: - nnMatrixGt(layer_output, 0, gradient); - nnMatrixMulPairs(&errors[l], gradient, gradient); - break; + case nnIdentity: + // TODO: Just copy the pointer? + *gradient = nnMatrixBorrow(&errors[l]); + break; + case nnSigmoid: + nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); + nnMatrixMulPairs(layer_output, gradient, gradient); + nnMatrixMulPairs(&errors[l], gradient, gradient); + break; + case nnRelu: + nnMatrixGt(layer_output, 0, gradient); + nnMatrixMulPairs(&errors[l], gradient, gradient); + break; } // Outer product to compute the weight deltas. - const nnMatrix* output_T = (l == 0) ? &training_inputs_T : &outputs_T[l-1]; + const nnMatrix* output_T = + (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; nnMatrixMul(output_T, gradient, &weight_deltas[l]); // Backpropagate the error before updating weights. if (l > 0) { // G * W^T == G *^T W. - //nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); - nnMatrixMulRows(gradient, layer_weights, &errors[l-1]); + // nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); + nnMatrixMulRows(gradient, layer_weights, &errors[l - 1]); } // Update weights. nnMatrixScale(&weight_deltas[l], params->learning_rate); // The gradient has a negative sign from -(t - o), but we have computed // e = o - t instead, so we can subtract directly. - //nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); + // nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); // Update weight transpose matrix for the next training iteration. - //nnMatrixTranspose(layer_weights, &weights_T[l]); + // nnMatrixTranspose(layer_weights, &weights_T[l]); // Update biases. // This is the same formula as for weights, except that the o_j term is // just 1. We can simply re-use the gradient that we have already // computed for the weight update. - //nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, layer_biases); - nnMatrixMulSub(layer_biases, gradient, params->learning_rate, layer_biases); + // nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, + // layer_biases); + nnMatrixMulSub( + layer_biases, gradient, params->learning_rate, layer_biases); } // TODO: Add this under a verbose debugging mode. // if (params->debug) { - // LOGD("Iter: %d, Sample: %d, Error: %f\n", iter, sample, ComputeMSE(&errors[net->num_layers - 1])); - // LOGD("TGT: "); - // for (int i = 0; i < training_targets.cols; ++i) { + // LOGD("Iter: %d, Sample: %d, Error: %f\n", iter, sample, + // ComputeMSE(&errors[net->num_layers - 1])); LOGD("TGT: "); for (int i + // = 0; i < training_targets.cols; ++i) { // printf("%.3f ", training_targets.values[i]); // } // printf("\n"); @@ -307,42 +315,44 @@ void nnTrain( } if (params->debug && ((iter % progress_frame) == 0)) { - LOGD("Iter: %d/%d, Error: %f\n", - iter, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); + LOGD( + "Iter: %d/%d, Error: %f\n", iter, params->max_iterations, + ComputeMSE(&errors[net->num_layers - 1])); } } // Print the final error. if (params->debug) { - LOGD("Iter: %d/%d, Error: %f\n", - params->max_iterations, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); + LOGD( + "Iter: %d/%d, Error: %f\n", params->max_iterations, + params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); } for (int l = 0; l < net->num_layers; ++l) { nnMatrixDel(&errors[l]); nnMatrixDel(&outputs_T[l]); - //nnMatrixDel(&weights_T[l]); + // nnMatrixDel(&weights_T[l]); nnMatrixDel(&weight_deltas[l]); nnGradientElements* elems = &gradient_elems[l]; switch (elems->type) { - case nnIdentity: - break; // Gradient vector is borrowed, no need to deallocate. + case nnIdentity: + break; // Gradient vector is borrowed, no need to deallocate. - case nnSigmoid: - nnMatrixDel(&elems->gradient); - nnMatrixDel(&elems->sigmoid.ones); - break; + case nnSigmoid: + nnMatrixDel(&elems->gradient); + nnMatrixDel(&elems->sigmoid.ones); + break; - case nnRelu: - nnMatrixDel(&elems->gradient); - break; + case nnRelu: + nnMatrixDel(&elems->gradient); + break; } } nnMatrixDel(&training_inputs_T); free(errors); free(outputs_T); - //free(weights_T); + // free(weights_T); free(weight_deltas); free(gradient_elems); } -- cgit v1.2.3