diff options
author | jeanne <jeanne@localhost.localdomain> | 2022-05-11 09:54:38 -0700 |
---|---|---|
committer | jeanne <jeanne@localhost.localdomain> | 2022-05-11 09:54:38 -0700 |
commit | 411f66a2540fa17c736116d865e0ceb0cfe5623b (patch) | |
tree | fa92c69ec627642c8452f928798ff6eccd24ddd6 | |
parent | 7705b07456dfd4b89c272613e98eda36cc787254 (diff) |
Initial commit.
-rw-r--r-- | CMakeLists.txt | 6 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | src/bin/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/bin/mnist/CMakeLists.txt | 11 | ||||
-rw-r--r-- | src/bin/mnist/src/main.c | 473 | ||||
-rw-r--r-- | src/lib/CMakeLists.txt | 37 | ||||
-rw-r--r-- | src/lib/include/neuralnet/matrix.h | 111 | ||||
-rw-r--r-- | src/lib/include/neuralnet/neuralnet.h | 64 | ||||
-rw-r--r-- | src/lib/include/neuralnet/train.h | 42 | ||||
-rw-r--r-- | src/lib/include/neuralnet/types.h | 3 | ||||
-rw-r--r-- | src/lib/src/activation.h | 21 | ||||
-rw-r--r-- | src/lib/src/matrix.c | 298 | ||||
-rw-r--r-- | src/lib/src/neuralnet.c | 228 | ||||
-rw-r--r-- | src/lib/src/neuralnet_impl.h | 36 | ||||
-rw-r--r-- | src/lib/src/train.c | 346 | ||||
-rw-r--r-- | src/lib/test/matrix_test.c | 350 | ||||
-rw-r--r-- | src/lib/test/neuralnet_test.c | 92 | ||||
-rw-r--r-- | src/lib/test/test.h | 185 | ||||
-rw-r--r-- | src/lib/test/test_main.c | 3 | ||||
-rw-r--r-- | src/lib/test/test_util.h | 22 | ||||
-rw-r--r-- | src/lib/test/train_linear_perceptron_non_origin_test.c | 67 | ||||
-rw-r--r-- | src/lib/test/train_linear_perceptron_test.c | 62 | ||||
-rw-r--r-- | src/lib/test/train_sigmoid_test.c | 66 | ||||
-rw-r--r-- | src/lib/test/train_xor_test.c | 66 |
24 files changed, 2593 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a060fab --- /dev/null +++ b/CMakeLists.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | cmake_minimum_required(VERSION 3.0) | ||
2 | |||
3 | project(neuralnet) | ||
4 | |||
5 | add_subdirectory(src/lib) | ||
6 | add_subdirectory(src/bin) | ||
@@ -1,7 +1,7 @@ | |||
1 | GNU AFFERO GENERAL PUBLIC LICENSE | 1 | GNU AFFERO GENERAL PUBLIC LICENSE |
2 | Version 3, 19 November 2007 | 2 | Version 3, 19 November 2007 |
3 | 3 | ||
4 | Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> | 4 | Copyright (C) 2022 Marc Sunet <https://shellblade.net/> |
5 | 5 | ||
6 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. | 6 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. |
7 | 7 | ||
diff --git a/src/bin/CMakeLists.txt b/src/bin/CMakeLists.txt new file mode 100644 index 0000000..051a56f --- /dev/null +++ b/src/bin/CMakeLists.txt | |||
@@ -0,0 +1,3 @@ | |||
1 | cmake_minimum_required(VERSION 3.0) | ||
2 | |||
3 | add_subdirectory(mnist) | ||
diff --git a/src/bin/mnist/CMakeLists.txt b/src/bin/mnist/CMakeLists.txt new file mode 100644 index 0000000..a6c54f2 --- /dev/null +++ b/src/bin/mnist/CMakeLists.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | cmake_minimum_required(VERSION 3.0) | ||
2 | |||
3 | add_executable(mnist | ||
4 | src/main.c) | ||
5 | |||
6 | target_link_libraries(mnist PRIVATE | ||
7 | neuralnet | ||
8 | bsd | ||
9 | z) | ||
10 | |||
11 | target_compile_options(mnist PRIVATE -Wall -Wextra) | ||
diff --git a/src/bin/mnist/src/main.c b/src/bin/mnist/src/main.c new file mode 100644 index 0000000..4d268ac --- /dev/null +++ b/src/bin/mnist/src/main.c | |||
@@ -0,0 +1,473 @@ | |||
1 | #include <neuralnet/matrix.h> | ||
2 | #include <neuralnet/neuralnet.h> | ||
3 | #include <neuralnet/train.h> | ||
4 | |||
5 | #include <zlib.h> | ||
6 | |||
7 | #include <assert.h> | ||
8 | #include <bsd/string.h> | ||
9 | #include <linux/limits.h> | ||
10 | #include <math.h> | ||
11 | #include <stdbool.h> | ||
12 | #include <stdint.h> | ||
13 | #include <stdio.h> | ||
14 | #include <stdlib.h> | ||
15 | |||
16 | static const int TRAIN_ITERATIONS = 100; | ||
17 | |||
18 | static const int32_t IMAGE_FILE_MAGIC = 0x00000803; | ||
19 | static const int32_t LABEL_FILE_MAGIC = 0x00000801; | ||
20 | |||
21 | // Inputs of 0 cancel weights during training. This value is used to rescale the | ||
22 | // input pixels from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. | ||
23 | static const double PIXEL_LOWER_BOUND = 0.01; | ||
24 | |||
25 | // Scale the outputs to (0,1) since the sigmoid cannot produce 0 or 1. | ||
26 | static const double LABEL_LOWER_BOUND = 0.01; | ||
27 | static const double LABEL_UPPER_BOUND = 0.99; | ||
28 | |||
29 | // Epsilon used to compare R values. | ||
30 | static const double EPS = 1e-10; | ||
31 | |||
32 | #define min(a,b) ((a) < (b) ? (a) : (b)) | ||
33 | |||
34 | typedef struct ImageSet { | ||
35 | nnMatrix images; // Images flattened into row vectors of the matrix. | ||
36 | nnMatrix labels; // One-hot-encoded labels. | ||
37 | int count; // Number of images and labels. | ||
38 | int rows; // Rows in an image. | ||
39 | int cols; // Columns in an image. | ||
40 | } ImageSet; | ||
41 | |||
42 | static void usage(const char* argv0) { | ||
43 | fprintf(stderr, "Usage: %s <path to mnist files directory> [num images]\n", argv0); | ||
44 | fprintf(stderr, "\n"); | ||
45 | fprintf(stderr, " Use -1 for [num images] to use all the images in the data set\n"); | ||
46 | } | ||
47 | |||
48 | static bool R_eq(R a, R b) { | ||
49 | return fabs(a-b) <= EPS; | ||
50 | } | ||
51 | |||
52 | static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) { | ||
53 | assert(images); | ||
54 | assert((0 <= image_index) && (image_index < images->rows)); | ||
55 | |||
56 | // Top line. | ||
57 | for (int j = 0; j < cols/2; ++j) { | ||
58 | printf(" -"); | ||
59 | } | ||
60 | printf("\n"); | ||
61 | |||
62 | // Image. | ||
63 | const R* value = nnMatrixRow(images, image_index); | ||
64 | for (int i = 0; i < rows; ++i) { | ||
65 | printf("|"); | ||
66 | for (int j = 0; j < cols; ++j) { | ||
67 | if (*value > 0.8) { | ||
68 | printf("#"); | ||
69 | } else if (*value > 0.5) { | ||
70 | printf("*"); | ||
71 | } | ||
72 | else if (*value > PIXEL_LOWER_BOUND) { | ||
73 | printf(":"); | ||
74 | } else if (*value == 0.0) { | ||
75 | // Values should not be exactly 0, otherwise they cancel out weights | ||
76 | // during training. | ||
77 | printf("X"); | ||
78 | } else { | ||
79 | printf(" "); | ||
80 | } | ||
81 | value++; | ||
82 | } | ||
83 | printf("|\n"); | ||
84 | } | ||
85 | |||
86 | // Bottom line. | ||
87 | for (int j = 0; j < cols/2; ++j) { | ||
88 | printf(" -"); | ||
89 | } | ||
90 | printf("\n"); | ||
91 | } | ||
92 | |||
93 | static void PrintLabel(const nnMatrix* labels, int label_index) { | ||
94 | assert(labels); | ||
95 | assert((0 <= label_index) && (label_index < labels->rows)); | ||
96 | |||
97 | // Compute the label from the one-hot encoding. | ||
98 | const R* value = nnMatrixRow(labels, label_index); | ||
99 | int label = -1; | ||
100 | for (int i = 0; i < 10; ++i) { | ||
101 | if (R_eq(*value++, LABEL_UPPER_BOUND)) { | ||
102 | label = i; | ||
103 | break; | ||
104 | } | ||
105 | } | ||
106 | assert((0 <= label) && (label <= 9)); | ||
107 | |||
108 | printf("Label: %d ( ", label); | ||
109 | value = nnMatrixRow(labels, label_index); | ||
110 | for (int i = 0; i < 10; ++i) { | ||
111 | printf("%.3f ", *value++); | ||
112 | } | ||
113 | printf(")\n"); | ||
114 | } | ||
115 | |||
116 | static R lerp(R a, R b, R t) { | ||
117 | return a + t*(b-a); | ||
118 | } | ||
119 | |||
120 | /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. | ||
121 | static R FormatPixel(uint8_t pixel) { | ||
122 | const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; | ||
123 | assert(value >= PIXEL_LOWER_BOUND); | ||
124 | assert(value <= 1.0); | ||
125 | return value; | ||
126 | } | ||
127 | |||
128 | /// Rescales a one-hot-encoded label value to (0,1). | ||
129 | static R FormatLabel(R label) { | ||
130 | const R value = lerp(LABEL_LOWER_BOUND, LABEL_UPPER_BOUND, label); | ||
131 | assert(value > 0.0); | ||
132 | assert(value < 1.0); | ||
133 | return value; | ||
134 | } | ||
135 | |||
136 | static int32_t ReverseEndian32(int32_t x) { | ||
137 | const int32_t x0 = x & 0xff; | ||
138 | const int32_t x1 = (x >> 8) & 0xff; | ||
139 | const int32_t x2 = (x >> 16) & 0xff; | ||
140 | const int32_t x3 = (x >> 24) & 0xff; | ||
141 | return (x0 << 24) | (x1 << 16) | (x2 << 8) | x3; | ||
142 | } | ||
143 | |||
144 | static void ImageToMatrix( | ||
145 | const uint8_t* pixels, int num_pixels, int row, nnMatrix* images) { | ||
146 | assert(pixels); | ||
147 | assert(images); | ||
148 | |||
149 | for (int i = 0; i < num_pixels; ++i) { | ||
150 | const R pixel = FormatPixel(pixels[i]); | ||
151 | nnMatrixSet(images, row, i, pixel); | ||
152 | } | ||
153 | } | ||
154 | |||
155 | static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) { | ||
156 | assert(images_file != Z_NULL); | ||
157 | assert(image_set); | ||
158 | |||
159 | bool success = false; | ||
160 | |||
161 | uint8_t* pixels = 0; | ||
162 | |||
163 | int32_t magic, total_images, rows, cols; | ||
164 | if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | ||
165 | (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) || | ||
166 | (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || | ||
167 | (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) { | ||
168 | fprintf(stderr, "Failed to read header\n"); | ||
169 | goto cleanup; | ||
170 | } | ||
171 | |||
172 | magic = ReverseEndian32(magic); | ||
173 | total_images = ReverseEndian32(total_images); | ||
174 | rows = ReverseEndian32(rows); | ||
175 | cols = ReverseEndian32(cols); | ||
176 | |||
177 | if (magic != IMAGE_FILE_MAGIC) { | ||
178 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | ||
179 | magic, IMAGE_FILE_MAGIC); | ||
180 | goto cleanup; | ||
181 | } | ||
182 | |||
183 | printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", | ||
184 | magic, total_images, rows, cols); | ||
185 | |||
186 | total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images; | ||
187 | |||
188 | // Images are flattened into single row vectors. | ||
189 | const int num_pixels = rows * cols; | ||
190 | image_set->images = nnMatrixMake(total_images, num_pixels); | ||
191 | image_set->count = total_images; | ||
192 | image_set->rows = rows; | ||
193 | image_set->cols = cols; | ||
194 | |||
195 | pixels = calloc(1, num_pixels); | ||
196 | if (!pixels) { | ||
197 | fprintf(stderr, "Failed to allocate image buffer\n"); | ||
198 | goto cleanup; | ||
199 | } | ||
200 | |||
201 | for (int i = 0; i < total_images; ++i) { | ||
202 | const int bytes_read = gzread(images_file, pixels, num_pixels); | ||
203 | if (bytes_read < num_pixels) { | ||
204 | fprintf(stderr, "Failed to read image %d\n", i); | ||
205 | goto cleanup; | ||
206 | } | ||
207 | ImageToMatrix(pixels, num_pixels, i, &image_set->images); | ||
208 | } | ||
209 | |||
210 | success = true; | ||
211 | |||
212 | cleanup: | ||
213 | if (pixels) { | ||
214 | free(pixels); | ||
215 | } | ||
216 | if (!success) { | ||
217 | nnMatrixDel(&image_set->images); | ||
218 | } | ||
219 | return success; | ||
220 | } | ||
221 | |||
222 | static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { | ||
223 | assert(labels_bytes); | ||
224 | assert(labels); | ||
225 | assert(labels->rows == num_labels); | ||
226 | assert(labels->cols == 10); | ||
227 | |||
228 | static const R one_hot[10][10] = { | ||
229 | { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
230 | { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
231 | { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, | ||
232 | { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, | ||
233 | { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, | ||
234 | { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, | ||
235 | { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, | ||
236 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, | ||
237 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, | ||
238 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, | ||
239 | }; | ||
240 | |||
241 | R* value = labels->values; | ||
242 | |||
243 | for (int i = 0; i < num_labels; ++i) { | ||
244 | const uint8_t label = labels_bytes[i]; | ||
245 | const R* one_hot_value = one_hot[label]; | ||
246 | |||
247 | for (int j = 0; j < 10; ++j) { | ||
248 | *value++ = FormatLabel(*one_hot_value++); | ||
249 | } | ||
250 | } | ||
251 | } | ||
252 | |||
253 | static int OneHotDecode(const nnMatrix* label_matrix) { | ||
254 | assert(label_matrix); | ||
255 | assert(label_matrix->cols == 1); | ||
256 | assert(label_matrix->rows == 10); | ||
257 | |||
258 | R max_value = 0; | ||
259 | int pos_max = 0; | ||
260 | for (int i = 0; i < 10; ++i) { | ||
261 | const R value = nnMatrixAt(label_matrix, 0, i); | ||
262 | if (value > max_value) { | ||
263 | max_value = value; | ||
264 | pos_max = i; | ||
265 | } | ||
266 | } | ||
267 | assert(pos_max >= 0); | ||
268 | assert(pos_max <= 10); | ||
269 | return pos_max; | ||
270 | } | ||
271 | |||
272 | static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) { | ||
273 | assert(labels_file != Z_NULL); | ||
274 | assert(image_set != 0); | ||
275 | |||
276 | bool success = false; | ||
277 | |||
278 | uint8_t* labels = 0; | ||
279 | |||
280 | int32_t magic, total_labels; | ||
281 | if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | ||
282 | (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) { | ||
283 | fprintf(stderr, "Failed to read header\n"); | ||
284 | goto cleanup; | ||
285 | } | ||
286 | |||
287 | magic = ReverseEndian32(magic); | ||
288 | total_labels = ReverseEndian32(total_labels); | ||
289 | |||
290 | if (magic != LABEL_FILE_MAGIC) { | ||
291 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | ||
292 | magic, LABEL_FILE_MAGIC); | ||
293 | goto cleanup; | ||
294 | } | ||
295 | |||
296 | printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); | ||
297 | |||
298 | total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; | ||
299 | |||
300 | assert(image_set->count == total_labels); | ||
301 | |||
302 | // One-hot encoding of labels, 10 values (digits) per label. | ||
303 | image_set->labels = nnMatrixMake(total_labels, 10); | ||
304 | |||
305 | labels = calloc(total_labels, sizeof(uint8_t)); | ||
306 | if (!labels) { | ||
307 | fprintf(stderr, "Failed to allocate labels buffer\n"); | ||
308 | goto cleanup; | ||
309 | } | ||
310 | |||
311 | if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) { | ||
312 | fprintf(stderr, "Failed to read labels\n"); | ||
313 | goto cleanup; | ||
314 | } | ||
315 | |||
316 | OneHotEncode(labels, total_labels, &image_set->labels); | ||
317 | |||
318 | success = true; | ||
319 | |||
320 | cleanup: | ||
321 | if (labels) { | ||
322 | free(labels); | ||
323 | } | ||
324 | if (!success) { | ||
325 | nnMatrixDel(&image_set->labels); | ||
326 | } | ||
327 | return success; | ||
328 | } | ||
329 | |||
330 | int main(int argc, const char** argv) { | ||
331 | if (argc < 2) { | ||
332 | usage(argv[0]); | ||
333 | return 1; | ||
334 | } | ||
335 | |||
336 | bool success = false; | ||
337 | |||
338 | gzFile train_images_file = Z_NULL; | ||
339 | gzFile train_labels_file = Z_NULL; | ||
340 | gzFile test_images_file = Z_NULL; | ||
341 | gzFile test_labels_file = Z_NULL; | ||
342 | ImageSet train_set = { 0 }; | ||
343 | ImageSet test_set = { 0 }; | ||
344 | nnNeuralNetwork* net = 0; | ||
345 | nnQueryObject* query = 0; | ||
346 | |||
347 | const char* mnist_files_dir = argv[1]; | ||
348 | const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; | ||
349 | |||
350 | char train_labels_path[PATH_MAX]; | ||
351 | char train_images_path[PATH_MAX]; | ||
352 | char test_labels_path[PATH_MAX]; | ||
353 | char test_images_path[PATH_MAX]; | ||
354 | strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); | ||
355 | strlcpy(train_images_path, mnist_files_dir, PATH_MAX); | ||
356 | strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); | ||
357 | strlcpy(test_images_path, mnist_files_dir, PATH_MAX); | ||
358 | strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); | ||
359 | strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); | ||
360 | strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); | ||
361 | strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); | ||
362 | |||
363 | train_images_file = gzopen(train_images_path, "r"); | ||
364 | if (train_images_file == Z_NULL) { | ||
365 | fprintf(stderr, "Failed to open file: %s\n", train_images_path); | ||
366 | goto cleanup; | ||
367 | } | ||
368 | |||
369 | train_labels_file = gzopen(train_labels_path, "r"); | ||
370 | if (train_labels_file == Z_NULL) { | ||
371 | fprintf(stderr, "Failed to open file: %s\n", train_labels_path); | ||
372 | goto cleanup; | ||
373 | } | ||
374 | |||
375 | test_images_file = gzopen(test_images_path, "r"); | ||
376 | if (test_images_file == Z_NULL) { | ||
377 | fprintf(stderr, "Failed to open file: %s\n", test_images_path); | ||
378 | goto cleanup; | ||
379 | } | ||
380 | |||
381 | test_labels_file = gzopen(test_labels_path, "r"); | ||
382 | if (test_labels_file == Z_NULL) { | ||
383 | fprintf(stderr, "Failed to open file: %s\n", test_labels_path); | ||
384 | goto cleanup; | ||
385 | } | ||
386 | |||
387 | if (!ReadImages(train_images_file, max_num_images, &train_set)) { | ||
388 | goto cleanup; | ||
389 | } | ||
390 | if (!ReadLabels(train_labels_file, max_num_images, &train_set)) { | ||
391 | goto cleanup; | ||
392 | } | ||
393 | |||
394 | if (!ReadImages(test_images_file, max_num_images, &test_set)) { | ||
395 | goto cleanup; | ||
396 | } | ||
397 | if (!ReadLabels(test_labels_file, max_num_images, &test_set)) { | ||
398 | goto cleanup; | ||
399 | } | ||
400 | |||
401 | printf("\nTraining image/label pair examples:\n"); | ||
402 | for (int i = 0; i < min(3, train_set.images.rows); ++i) { | ||
403 | PrintImage(&train_set.images, train_set.rows, train_set.cols, i); | ||
404 | PrintLabel(&train_set.labels, i); | ||
405 | printf("\n"); | ||
406 | } | ||
407 | |||
408 | // Network definition. | ||
409 | const int image_size_pixels = train_set.rows * train_set.cols; | ||
410 | const int num_layers = 2; | ||
411 | const int layer_sizes[3] = { image_size_pixels, 100, 10 }; | ||
412 | const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid }; | ||
413 | if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) { | ||
414 | fprintf(stderr, "Failed to create neural network\n"); | ||
415 | goto cleanup; | ||
416 | } | ||
417 | |||
418 | // Train. | ||
419 | printf("Training with up to %d images from the data set\n\n", max_num_images); | ||
420 | const nnTrainingParams training_params = { | ||
421 | .learning_rate = 0.1, | ||
422 | .max_iterations = TRAIN_ITERATIONS, | ||
423 | .seed = 0, | ||
424 | .weight_init = nnWeightInitNormal, | ||
425 | .debug = true, | ||
426 | }; | ||
427 | nnTrain(net, &train_set.images, &train_set.labels, &training_params); | ||
428 | |||
429 | // Test. | ||
430 | int hits = 0; | ||
431 | query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
432 | for (int i = 0; i < test_set.count; ++i) { | ||
433 | const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); | ||
434 | const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); | ||
435 | |||
436 | nnQuery(net, query, &test_image); | ||
437 | |||
438 | const int test_label_expected = OneHotDecode(&test_label); | ||
439 | const int test_label_actual = OneHotDecode(nnNetOutputs(query)); | ||
440 | |||
441 | if (test_label_actual == test_label_expected) { | ||
442 | ++hits; | ||
443 | } | ||
444 | } | ||
445 | const R hit_ratio = (R)hits / (R)test_set.count; | ||
446 | printf("Test images: %d\n", test_set.count); | ||
447 | printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100); | ||
448 | |||
449 | success = true; | ||
450 | |||
451 | cleanup: | ||
452 | if (query) { | ||
453 | nnDeleteQueryObject(&query); | ||
454 | } | ||
455 | if (net) { | ||
456 | nnDeleteNet(&net); | ||
457 | } | ||
458 | nnMatrixDel(&train_set.images); | ||
459 | nnMatrixDel(&test_set.images); | ||
460 | if (train_images_file != Z_NULL) { | ||
461 | gzclose(train_images_file); | ||
462 | } | ||
463 | if (train_labels_file != Z_NULL) { | ||
464 | gzclose(train_labels_file); | ||
465 | } | ||
466 | if (test_images_file != Z_NULL) { | ||
467 | gzclose(test_images_file); | ||
468 | } | ||
469 | if (test_labels_file != Z_NULL) { | ||
470 | gzclose(test_labels_file); | ||
471 | } | ||
472 | return success ? 0 : 1; | ||
473 | } | ||
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt new file mode 100644 index 0000000..9e0e924 --- /dev/null +++ b/src/lib/CMakeLists.txt | |||
@@ -0,0 +1,37 @@ | |||
1 | cmake_minimum_required(VERSION 3.0) | ||
2 | |||
3 | # Library | ||
4 | |||
5 | add_library(neuralnet | ||
6 | src/matrix.c | ||
7 | src/neuralnet.c | ||
8 | src/train.c) | ||
9 | |||
10 | target_include_directories(neuralnet PUBLIC | ||
11 | include) | ||
12 | |||
13 | target_link_libraries(neuralnet PRIVATE | ||
14 | math # System math library. | ||
15 | random) | ||
16 | |||
17 | target_compile_options(neuralnet PRIVATE -Wall -Wextra) | ||
18 | |||
19 | # Test | ||
20 | |||
21 | add_executable(neuralnet-test | ||
22 | test/matrix_test.c | ||
23 | test/neuralnet_test.c | ||
24 | test/test_main.c | ||
25 | test/train_linear_perceptron_test.c | ||
26 | test/train_linear_perceptron_non_origin_test.c | ||
27 | test/train_sigmoid_test.c | ||
28 | test/train_xor_test.c) | ||
29 | |||
30 | target_link_libraries(neuralnet-test PRIVATE | ||
31 | neuralnet) | ||
32 | |||
33 | # So that we can include header files from the private implementation. | ||
34 | target_include_directories(neuralnet-test PRIVATE | ||
35 | src) | ||
36 | |||
37 | target_compile_options(neuralnet-test PRIVATE -DUNIT_TEST -Wall -Wextra) | ||
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h new file mode 100644 index 0000000..9816b81 --- /dev/null +++ b/src/lib/include/neuralnet/matrix.h | |||
@@ -0,0 +1,111 @@ | |||
1 | #pragma once | ||
2 | |||
3 | #include <neuralnet/types.h> | ||
4 | |||
5 | #include <assert.h> | ||
6 | |||
7 | /// NxM matrix. | ||
8 | typedef struct nnMatrix { | ||
9 | int rows; | ||
10 | int cols; | ||
11 | R* values; | ||
12 | } nnMatrix; | ||
13 | |||
14 | /// Construct a matrix. | ||
15 | nnMatrix nnMatrixMake(int rows, int cols); | ||
16 | |||
17 | /// Delete a matrix and free its internal memory. | ||
18 | void nnMatrixDel(nnMatrix*); | ||
19 | |||
20 | /// Move a matrix. | ||
21 | /// | ||
22 | /// |in| is an empty matrix after the move. | ||
23 | /// |out| is a matrix like |in| before the move. | ||
24 | void nnMatrixMove(nnMatrix* in, nnMatrix* out); | ||
25 | |||
26 | /// Deep-copy a matrix. | ||
27 | void nnMatrixCopy(const nnMatrix* in, nnMatrix* out); | ||
28 | |||
29 | /// Write the matrix values into an array in a row-major fashion. | ||
30 | void nnMatrixToArray(const nnMatrix* in, R* out); | ||
31 | |||
32 | /// Write the given row of a matrix into an array. | ||
33 | void nnMatrixRowToArray(const nnMatrix* in, int row, R* out); | ||
34 | |||
35 | /// Copy a column from a source to a target matrix. | ||
36 | void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out); | ||
37 | |||
38 | /// Mutable borrow of a matrix. | ||
39 | nnMatrix nnMatrixBorrow(nnMatrix* in); | ||
40 | |||
41 | /// Mutable borrow of a subrange of rows of a matrix. | ||
42 | nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows); | ||
43 | |||
44 | /// Initialize the matrix from an array of values. | ||
45 | /// | ||
46 | /// The array must hold values in a row-major fashion. | ||
47 | void nnMatrixInit(nnMatrix*, const R* values); | ||
48 | |||
49 | /// Initialize all matrix values to a given constant. | ||
50 | void nnMatrixInitConstant(nnMatrix*, R value); | ||
51 | |||
52 | /// Multiply two matrices. | ||
53 | void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
54 | |||
55 | /// Matrix multiply-add. | ||
56 | /// | ||
57 | /// out = left + (right * scale) | ||
58 | void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); | ||
59 | |||
60 | /// Matrix multiply-subtract. | ||
61 | /// | ||
62 | /// out = left - (right * scale) | ||
63 | void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); | ||
64 | |||
65 | /// Hadamard product of two matrices. | ||
66 | void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
67 | |||
68 | /// Add two matrices. | ||
69 | void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
70 | |||
71 | /// Subtract two matrices. | ||
72 | void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
73 | |||
74 | /// Adds a row vector to all rows of the matrix. | ||
75 | void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out); | ||
76 | |||
77 | /// Scale a matrix. | ||
78 | void nnMatrixScale(nnMatrix*, R scale); | ||
79 | |||
80 | /// Transpose a matrix. | ||
81 | /// |in| must be different than |out|. | ||
82 | void nnMatrixTranspose(const nnMatrix* in, nnMatrix* out); | ||
83 | |||
84 | /// Threshold the values of a matrix using a greater-than operator. | ||
85 | /// | ||
86 | /// out[x,y] = 1 if in[x,y] > threshold else 0 | ||
87 | void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out); | ||
88 | |||
89 | /// Return the matrix value at the given row and column. | ||
90 | static inline R nnMatrixAt(const nnMatrix* matrix, int row, int col) { | ||
91 | assert(matrix); | ||
92 | return matrix->values[row * matrix->cols + col]; | ||
93 | } | ||
94 | |||
95 | /// Set the matrix value at the given row and column. | ||
96 | static inline void nnMatrixSet(nnMatrix* matrix, int row, int col, R value) { | ||
97 | assert(matrix); | ||
98 | matrix->values[row * matrix->cols + col] = value; | ||
99 | } | ||
100 | |||
101 | /// Return a pointer to the given row in the matrix. | ||
102 | static inline const R* nnMatrixRow(const nnMatrix* matrix, int row) { | ||
103 | assert(matrix); | ||
104 | return &matrix->values[row * matrix->cols]; | ||
105 | } | ||
106 | |||
107 | /// Return a mutable pointer to the given row in the matrix. | ||
108 | static inline R* nnMatrixRow_mut(nnMatrix* matrix, int row) { | ||
109 | assert(matrix); | ||
110 | return &matrix->values[row * matrix->cols]; | ||
111 | } | ||
diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h new file mode 100644 index 0000000..1cf1c53 --- /dev/null +++ b/src/lib/include/neuralnet/neuralnet.h | |||
@@ -0,0 +1,64 @@ | |||
1 | #pragma once | ||
2 | |||
3 | #include <neuralnet/types.h> | ||
4 | |||
5 | typedef struct nnMatrix nnMatrix; | ||
6 | |||
7 | typedef struct nnNeuralNetwork nnNeuralNetwork; | ||
8 | typedef struct nnQueryObject nnQueryObject; | ||
9 | |||
10 | /// Neuron activation. | ||
11 | typedef enum nnActivation { | ||
12 | nnIdentity, | ||
13 | nnSigmoid, | ||
14 | nnRelu, | ||
15 | } nnActivation; | ||
16 | |||
17 | /// Create a network. | ||
18 | nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations); | ||
19 | |||
20 | /// Delete the network and free its internal memory. | ||
21 | void nnDeleteNet(nnNeuralNetwork**); | ||
22 | |||
23 | /// Set the network's weights. | ||
24 | void nnSetWeights(nnNeuralNetwork*, const R* weights); | ||
25 | |||
26 | /// Set the network's biases. | ||
27 | void nnSetBiases(nnNeuralNetwork*, const R* biases); | ||
28 | |||
29 | /// Query the network. | ||
30 | /// | ||
31 | /// |input| is a matrix of inputs, one row per input and as many columns as the | ||
32 | /// input's dimension. | ||
33 | /// | ||
34 | /// The query object's output matrix (see nnQueryOutputs()) is a matrix of | ||
35 | /// outputs, one row per output and as many columns as the output's dimension. | ||
36 | void nnQuery(const nnNeuralNetwork*, nnQueryObject*, const nnMatrix* input); | ||
37 | |||
38 | /// Query the network, array version. | ||
39 | void nnQueryArray(const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output); | ||
40 | |||
41 | /// Create a query object. | ||
42 | /// | ||
43 | /// The query object holds all the internal memory required to query a network. | ||
44 | /// Query objects allocate all memory up front so that network queries can run | ||
45 | /// without additional memory allocation. | ||
46 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs); | ||
47 | |||
48 | /// Delete the query object and free its internal memory. | ||
49 | void nnDeleteQueryObject(nnQueryObject**); | ||
50 | |||
51 | /// Return the outputs of the query. | ||
52 | const nnMatrix* nnNetOutputs(const nnQueryObject*); | ||
53 | |||
54 | /// Return the network's input size. | ||
55 | int nnNetInputSize(const nnNeuralNetwork*); | ||
56 | |||
57 | /// Return the network's output size. | ||
58 | int nnNetOutputSize(const nnNeuralNetwork*); | ||
59 | |||
60 | /// Return the layer's input size. | ||
61 | int nnLayerInputSize(const nnMatrix* weights); | ||
62 | |||
63 | /// Return the layer's output size. | ||
64 | int nnLayerOutputSize(const nnMatrix* weights); | ||
diff --git a/src/lib/include/neuralnet/train.h b/src/lib/include/neuralnet/train.h new file mode 100644 index 0000000..79f8e7b --- /dev/null +++ b/src/lib/include/neuralnet/train.h | |||
@@ -0,0 +1,42 @@ | |||
1 | #pragma once | ||
2 | |||
3 | #include <neuralnet/neuralnet.h> | ||
4 | |||
5 | #include <stdbool.h> | ||
6 | #include <stdint.h> | ||
7 | |||
8 | typedef struct nnMatrix nnMatrix; | ||
9 | |||
10 | /// Weight initialization strategy. | ||
11 | /// | ||
12 | /// Note that regardless of strategy, a layer's weights are scaled by the | ||
13 | /// layer's size. This is to avoid saturation when, e.g., using a sigmoid | ||
14 | /// activation with many inputs. Thus, a (0,1) initialization is really | ||
15 | /// (0,scale), for example. | ||
16 | typedef enum nnWeightInitStrategy { | ||
17 | nnWeightInit01, // (0,1) range. | ||
18 | nnWeightInit11, // (-1,+1) range. | ||
19 | nnWeightInitNormal, // Normal distribution. | ||
20 | } nnWeightInitStrategy; | ||
21 | |||
22 | /// Network training parameters. | ||
23 | typedef struct nnTrainingParams { | ||
24 | R learning_rate; | ||
25 | int max_iterations; | ||
26 | uint64_t seed; | ||
27 | nnWeightInitStrategy weight_init; | ||
28 | bool debug; | ||
29 | } nnTrainingParams; | ||
30 | |||
31 | /// Train the network. | ||
32 | /// | ||
33 | /// |inputs| is a matrix of inputs, one row per input and as many columns as | ||
34 | /// the input's dimension. | ||
35 | /// | ||
36 | /// |targets| is a matrix of targets, one row per target and as many columns as | ||
37 | /// the target's dimension. | ||
38 | void nnTrain( | ||
39 | nnNeuralNetwork*, | ||
40 | const nnMatrix* inputs, | ||
41 | const nnMatrix* targets, | ||
42 | const nnTrainingParams*); | ||
diff --git a/src/lib/include/neuralnet/types.h b/src/lib/include/neuralnet/types.h new file mode 100644 index 0000000..e8d3942 --- /dev/null +++ b/src/lib/include/neuralnet/types.h | |||
@@ -0,0 +1,3 @@ | |||
1 | #pragma once | ||
2 | |||
3 | typedef double R; | ||
diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h new file mode 100644 index 0000000..42ab73f --- /dev/null +++ b/src/lib/src/activation.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #pragma once | ||
2 | |||
3 | #include <neuralnet/types.h> | ||
4 | |||
5 | #include <math.h> | ||
6 | |||
7 | static inline R sigmoid(R x) { | ||
8 | return 1. / (1. + exp(-x)); | ||
9 | } | ||
10 | |||
11 | static inline R relu(R x) { | ||
12 | return fmax(0, x); | ||
13 | } | ||
14 | |||
15 | #define NN_MAP_ARRAY(f, in, out, size) \ | ||
16 | for (int i = 0; i < size; ++i) { \ | ||
17 | out[i] = f(in[i]); \ | ||
18 | } | ||
19 | |||
20 | #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) | ||
21 | #define relu_array(in, out, size) NN_MAP_ARRAY(relu, in, out, size) | ||
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c new file mode 100644 index 0000000..a7a4ce6 --- /dev/null +++ b/src/lib/src/matrix.c | |||
@@ -0,0 +1,298 @@ | |||
1 | #include <neuralnet/matrix.h> | ||
2 | |||
3 | #include <assert.h> | ||
4 | #include <stdlib.h> | ||
5 | #include <string.h> | ||
6 | |||
7 | nnMatrix nnMatrixMake(int rows, int cols) { | ||
8 | R* values = calloc(rows * cols, sizeof(R)); | ||
9 | assert(values != 0); | ||
10 | |||
11 | return (nnMatrix) { | ||
12 | .rows = rows, | ||
13 | .cols = cols, | ||
14 | .values = values, | ||
15 | }; | ||
16 | } | ||
17 | |||
18 | void nnMatrixDel(nnMatrix* matrix) { | ||
19 | assert(matrix != 0); | ||
20 | |||
21 | if (matrix->values != 0) { | ||
22 | free(matrix->values); | ||
23 | matrix->values = 0; | ||
24 | matrix->rows = 0; | ||
25 | matrix->cols = 0; | ||
26 | } | ||
27 | } | ||
28 | |||
29 | void nnMatrixMove(nnMatrix* in, nnMatrix* out) { | ||
30 | assert(in); | ||
31 | assert(out); | ||
32 | |||
33 | out->rows = in->rows; | ||
34 | out->cols = in->cols; | ||
35 | out->values = in->values; | ||
36 | |||
37 | in->rows = 0; | ||
38 | in->cols = 0; | ||
39 | in->values = 0; | ||
40 | } | ||
41 | |||
42 | void nnMatrixCopy(const nnMatrix* in, nnMatrix* out) { | ||
43 | assert(in); | ||
44 | assert(out); | ||
45 | assert(in->rows == out->rows); | ||
46 | assert(in->cols == out->cols); | ||
47 | |||
48 | const R* in_value = in->values; | ||
49 | R* out_value = out->values; | ||
50 | |||
51 | for (int i = 0; i < in->rows * in->cols; ++i) { | ||
52 | *out_value++ = *in_value++; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | void nnMatrixToArray(const nnMatrix* in, R* out) { | ||
57 | assert(in); | ||
58 | assert(out); | ||
59 | |||
60 | const R* values = in->values; | ||
61 | for (int i = 0; i < in->rows * in->cols; ++i) { | ||
62 | *out++ = *values++; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | void nnMatrixRowToArray(const nnMatrix* in, int row, R* out) { | ||
67 | assert(in); | ||
68 | assert(out); | ||
69 | |||
70 | const R* values = in->values + row * in->cols; | ||
71 | for (int i = 0; i < in->cols; ++i) { | ||
72 | *out++ = *values++; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out) { | ||
77 | assert(in); | ||
78 | assert(out); | ||
79 | assert(in->rows == out->rows); | ||
80 | assert(col_in < in->cols); | ||
81 | assert(col_out < out->cols); | ||
82 | |||
83 | for (int row = 0; row < in->rows; ++row) { | ||
84 | nnMatrixSet(out, row, col_out, nnMatrixAt(in, row, col_in)); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | nnMatrix nnMatrixBorrow(nnMatrix* in) { | ||
89 | assert(in); | ||
90 | |||
91 | nnMatrix out; | ||
92 | out.rows = in->rows; | ||
93 | out.cols = in->cols; | ||
94 | out.values = in->values; | ||
95 | return out; | ||
96 | } | ||
97 | |||
98 | nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows) { | ||
99 | assert(in); | ||
100 | assert(row_start < in->rows); | ||
101 | assert(row_start + num_rows <= in->rows); | ||
102 | |||
103 | nnMatrix out; | ||
104 | out.rows = num_rows; | ||
105 | out.cols = in->cols; | ||
106 | out.values = nnMatrixRow_mut(in, row_start); | ||
107 | return out; | ||
108 | } | ||
109 | |||
110 | void nnMatrixInit(nnMatrix* matrix, const R* values) { | ||
111 | assert(matrix); | ||
112 | assert(values); | ||
113 | memcpy(matrix->values, values, matrix->rows * matrix->cols * sizeof(R)); | ||
114 | } | ||
115 | |||
116 | void nnMatrixInitConstant(nnMatrix* matrix, R value) { | ||
117 | assert(matrix); | ||
118 | for (int i = 0; i < matrix->rows * matrix->cols; ++i) { | ||
119 | matrix->values[i] = value; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
124 | assert(left != 0); | ||
125 | assert(right != 0); | ||
126 | assert(out != 0); | ||
127 | assert(out != left); | ||
128 | assert(out != right); | ||
129 | assert(left->cols == right->rows); | ||
130 | assert(out->rows == left->rows); | ||
131 | assert(out->cols == right->cols); | ||
132 | |||
133 | R* out_value = out->values; | ||
134 | |||
135 | for (int i = 0; i < left->rows; ++i) { | ||
136 | const R* left_row = &left->values[i * left->cols]; | ||
137 | |||
138 | for (int j = 0; j < right->cols; ++j) { | ||
139 | const R* right_col = &right->values[j]; | ||
140 | *out_value = 0; | ||
141 | |||
142 | // Vector dot product. | ||
143 | for (int k = 0; k < left->cols; ++k) { | ||
144 | *out_value += left_row[k] * right_col[0]; | ||
145 | right_col += right->cols; // Next row in the column. | ||
146 | } | ||
147 | |||
148 | out_value++; | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { | ||
154 | assert(left); | ||
155 | assert(right); | ||
156 | assert(out); | ||
157 | assert(left->rows == right->rows); | ||
158 | assert(left->cols == right->cols); | ||
159 | assert(left->rows == out->rows); | ||
160 | assert(left->cols == out->cols); | ||
161 | |||
162 | const R* left_value = left->values; | ||
163 | const R* right_value = right->values; | ||
164 | R* out_value = out->values; | ||
165 | |||
166 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
167 | *out_value++ = *left_value++ + *right_value++ * scale; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { | ||
172 | assert(left); | ||
173 | assert(right); | ||
174 | assert(out); | ||
175 | assert(left->rows == right->rows); | ||
176 | assert(left->cols == right->cols); | ||
177 | assert(left->rows == out->rows); | ||
178 | assert(left->cols == out->cols); | ||
179 | |||
180 | const R* left_value = left->values; | ||
181 | const R* right_value = right->values; | ||
182 | R* out_value = out->values; | ||
183 | |||
184 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
185 | *out_value++ = *left_value++ - *right_value++ * scale; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
190 | assert(left != 0); | ||
191 | assert(right != 0); | ||
192 | assert(out != 0); | ||
193 | assert(left->rows == right->rows); | ||
194 | assert(left->cols == right->cols); | ||
195 | assert(left->rows == out->rows); | ||
196 | assert(left->cols == out->cols); | ||
197 | |||
198 | R* left_value = left->values; | ||
199 | R* right_value = right->values; | ||
200 | R* out_value = out->values; | ||
201 | |||
202 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
203 | *out_value++ = *left_value++ * *right_value++; | ||
204 | } | ||
205 | } | ||
206 | |||
207 | void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
208 | assert(left); | ||
209 | assert(right); | ||
210 | assert(out); | ||
211 | assert(left->rows == right->rows); | ||
212 | assert(left->cols == right->cols); | ||
213 | assert(left->rows == out->rows); | ||
214 | assert(left->cols == out->cols); | ||
215 | |||
216 | const R* left_value = left->values; | ||
217 | const R* right_value = right->values; | ||
218 | R* out_value = out->values; | ||
219 | |||
220 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
221 | *out_value++ = *left_value++ + *right_value++; | ||
222 | } | ||
223 | } | ||
224 | |||
225 | void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
226 | assert(left); | ||
227 | assert(right); | ||
228 | assert(out); | ||
229 | assert(left->rows == right->rows); | ||
230 | assert(left->cols == right->cols); | ||
231 | assert(left->rows == out->rows); | ||
232 | assert(left->cols == out->cols); | ||
233 | |||
234 | const R* left_value = left->values; | ||
235 | const R* right_value = right->values; | ||
236 | R* out_value = out->values; | ||
237 | |||
238 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
239 | *out_value++ = *left_value++ - *right_value++; | ||
240 | } | ||
241 | } | ||
242 | |||
243 | void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) { | ||
244 | assert(matrix); | ||
245 | assert(row); | ||
246 | assert(out); | ||
247 | assert(row->rows == 1); | ||
248 | assert(matrix->cols == row->cols); | ||
249 | assert(matrix->rows == out->rows); | ||
250 | assert(matrix->cols == out->cols); | ||
251 | |||
252 | const R* matrix_value = matrix->values; | ||
253 | R* out_value = out->values; | ||
254 | |||
255 | for (int i = 0; i < matrix->rows; ++i) { | ||
256 | const R* row_value = row->values; | ||
257 | for (int j = 0; j < row->cols; ++j) { | ||
258 | *out_value++ = *matrix_value++ + *row_value++; | ||
259 | } | ||
260 | } | ||
261 | } | ||
262 | |||
263 | void nnMatrixScale(nnMatrix* matrix, R scale) { | ||
264 | assert(matrix); | ||
265 | |||
266 | R* value = matrix->values; | ||
267 | for (int i = 0; i < matrix->rows * matrix->cols; ++i) { | ||
268 | *value++ *= scale; | ||
269 | } | ||
270 | } | ||
271 | |||
272 | void nnMatrixTranspose(const nnMatrix* in, nnMatrix* out) { | ||
273 | assert(in); | ||
274 | assert(out); | ||
275 | assert(in != out); | ||
276 | assert(in->rows == out->cols); | ||
277 | assert(in->cols == out->rows); | ||
278 | |||
279 | for (int i = 0; i < in->rows; ++i) { | ||
280 | for (int j = 0; j < in->cols; ++j) { | ||
281 | nnMatrixSet(out, j, i, nnMatrixAt(in, i, j)); | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | |||
286 | void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out) { | ||
287 | assert(in); | ||
288 | assert(out); | ||
289 | assert(in->rows == out->rows); | ||
290 | assert(in->cols == out->cols); | ||
291 | |||
292 | const R* in_value = in->values; | ||
293 | R* out_value = out->values; | ||
294 | |||
295 | for (int i = 0; i < in->rows * in->cols; ++i) { | ||
296 | *out_value++ = (*in_value++) > threshold ? 1 : 0; | ||
297 | } | ||
298 | } | ||
diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c new file mode 100644 index 0000000..cac611a --- /dev/null +++ b/src/lib/src/neuralnet.c | |||
@@ -0,0 +1,228 @@ | |||
1 | #include <neuralnet/neuralnet.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include "activation.h" | ||
5 | #include "neuralnet_impl.h" | ||
6 | |||
7 | #include <assert.h> | ||
8 | #include <stdlib.h> | ||
9 | |||
10 | nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations) { | ||
11 | assert(num_layers > 0); | ||
12 | assert(layer_sizes); | ||
13 | assert(activations); | ||
14 | |||
15 | nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); | ||
16 | if (net == 0) { | ||
17 | return 0; | ||
18 | } | ||
19 | |||
20 | net->num_layers = num_layers; | ||
21 | |||
22 | net->weights = calloc(num_layers, sizeof(nnMatrix)); | ||
23 | net->biases = calloc(num_layers, sizeof(nnMatrix)); | ||
24 | net->activations = calloc(num_layers, sizeof(nnActivation)); | ||
25 | if ( (net->weights == 0) || (net->biases == 0) || (net->activations == 0) ) { | ||
26 | nnDeleteNet(&net); | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | for (int l = 0; l < num_layers; ++l) { | ||
31 | // layer_sizes = { input layer size, first hidden layer size, ...} | ||
32 | const int layer_input_size = layer_sizes[l]; | ||
33 | const int layer_output_size = layer_sizes[l+1]; | ||
34 | |||
35 | // We store the transpose of the weight matrix as written in textbooks. | ||
36 | // Our vectors are row vectors and the matrices row-major. | ||
37 | const int rows = layer_input_size; | ||
38 | const int cols = layer_output_size; | ||
39 | |||
40 | net->weights[l] = nnMatrixMake(rows, cols); | ||
41 | net->biases[l] = nnMatrixMake(1, cols); | ||
42 | net->activations[l] = activations[l]; | ||
43 | } | ||
44 | |||
45 | return net; | ||
46 | } | ||
47 | |||
48 | void nnDeleteNet(nnNeuralNetwork** net) { | ||
49 | if ( (!net) || (!(*net)) ) { | ||
50 | return; | ||
51 | } | ||
52 | if ((*net)->weights != 0) { | ||
53 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
54 | nnMatrixDel(&(*net)->weights[l]); | ||
55 | } | ||
56 | free((*net)->weights); | ||
57 | (*net)->weights = 0; | ||
58 | } | ||
59 | if ((*net)->biases != 0) { | ||
60 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
61 | nnMatrixDel(&(*net)->biases[l]); | ||
62 | } | ||
63 | free((*net)->biases); | ||
64 | (*net)->biases = 0; | ||
65 | } | ||
66 | if ((*net)->activations) { | ||
67 | free((*net)->activations); | ||
68 | (*net)->activations = 0; | ||
69 | } | ||
70 | free(*net); | ||
71 | *net = 0; | ||
72 | } | ||
73 | |||
74 | void nnSetWeights(nnNeuralNetwork* net, const R* weights) { | ||
75 | assert(net); | ||
76 | assert(weights); | ||
77 | |||
78 | for (int l = 0; l < net->num_layers; ++l) { | ||
79 | nnMatrix* layer_weights = &net->weights[l]; | ||
80 | R* layer_values = layer_weights->values; | ||
81 | |||
82 | for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) { | ||
83 | *layer_values++ = *weights++; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | |||
88 | void nnSetBiases(nnNeuralNetwork* net, const R* biases) { | ||
89 | assert(net); | ||
90 | assert(biases); | ||
91 | |||
92 | for (int l = 0; l < net->num_layers; ++l) { | ||
93 | nnMatrix* layer_biases = &net->biases[l]; | ||
94 | R* layer_values = layer_biases->values; | ||
95 | |||
96 | for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) { | ||
97 | *layer_values++ = *biases++; | ||
98 | } | ||
99 | } | ||
100 | } | ||
101 | |||
102 | void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) { | ||
103 | assert(net); | ||
104 | assert(query); | ||
105 | assert(input); | ||
106 | assert(net->num_layers == query->num_layers); | ||
107 | assert(input->rows <= query->network_outputs->rows); | ||
108 | assert(input->cols == nnNetInputSize(net)); | ||
109 | |||
110 | for (int i = 0; i < input->rows; ++i) { | ||
111 | // Not mutating the input, but we need the cast to borrow. | ||
112 | nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); | ||
113 | |||
114 | for (int l = 0; l < net->num_layers; ++l) { | ||
115 | const nnMatrix* layer_weights = &net->weights[l]; | ||
116 | const nnMatrix* layer_biases = &net->biases[l]; | ||
117 | // Y^T = (W*X)^T = X^T*W^T | ||
118 | // | ||
119 | // TODO: If we had a row-row matrix multiplication, we could compute: | ||
120 | // Y^T = W ** X^T | ||
121 | // The row-row multiplication could be more cache-friendly. We just need | ||
122 | // to store W as is, without transposing. | ||
123 | // We could also rewrite the original Mul function to go row x row, | ||
124 | // decomposing the multiplication. Preserving the original meaning of Mul | ||
125 | // makes everything clearer. | ||
126 | nnMatrix output_vector = nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); | ||
127 | nnMatrixMul(&input_vector, layer_weights, &output_vector); | ||
128 | nnMatrixAddRow(&output_vector, layer_biases, &output_vector); | ||
129 | |||
130 | switch (net->activations[l]) { | ||
131 | case nnIdentity: | ||
132 | break; // Nothing to do for the identity function. | ||
133 | case nnSigmoid: | ||
134 | sigmoid_array(output_vector.values, output_vector.values, output_vector.cols); | ||
135 | break; | ||
136 | case nnRelu: | ||
137 | relu_array(output_vector.values, output_vector.values, output_vector.cols); | ||
138 | break; | ||
139 | default: | ||
140 | assert(0); | ||
141 | } | ||
142 | |||
143 | input_vector = output_vector; // Borrow. | ||
144 | } | ||
145 | } | ||
146 | } | ||
147 | |||
148 | void nnQueryArray(const nnNeuralNetwork* net, nnQueryObject* query, const R* input, R* output) { | ||
149 | assert(net); | ||
150 | assert(query); | ||
151 | assert(input); | ||
152 | assert(output); | ||
153 | assert(net->num_layers > 0); | ||
154 | |||
155 | nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1); | ||
156 | nnMatrixInit(&input_vector, input); | ||
157 | nnQuery(net, query, &input_vector); | ||
158 | nnMatrixRowToArray(query->network_outputs, 0, output); | ||
159 | } | ||
160 | |||
161 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { | ||
162 | assert(net); | ||
163 | assert(num_inputs > 0); | ||
164 | assert(net->num_layers > 0); | ||
165 | |||
166 | nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); | ||
167 | if (!query) { | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | query->num_layers = net->num_layers; | ||
172 | |||
173 | // Allocate the intermediate layer output matrices. | ||
174 | query->layer_outputs = calloc(net->num_layers, sizeof(nnMatrix)); | ||
175 | if (!query->layer_outputs) { | ||
176 | free(query); | ||
177 | return 0; | ||
178 | } | ||
179 | for (int l = 0; l < net->num_layers; ++l) { | ||
180 | const nnMatrix* layer_weights = &net->weights[l]; | ||
181 | const int layer_output_size = nnLayerOutputSize(layer_weights); | ||
182 | query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); | ||
183 | } | ||
184 | query->network_outputs = &query->layer_outputs[net->num_layers - 1]; | ||
185 | |||
186 | return query; | ||
187 | } | ||
188 | |||
189 | void nnDeleteQueryObject(nnQueryObject** query) { | ||
190 | if ( (!query) || (!(*query)) ) { | ||
191 | return; | ||
192 | } | ||
193 | if ((*query)->layer_outputs != 0) { | ||
194 | for (int l = 0; l < (*query)->num_layers; ++l) { | ||
195 | nnMatrixDel(&(*query)->layer_outputs[l]); | ||
196 | } | ||
197 | } | ||
198 | free((*query)->layer_outputs); | ||
199 | free(*query); | ||
200 | *query = 0; | ||
201 | } | ||
202 | |||
203 | const nnMatrix* nnNetOutputs(const nnQueryObject* query) { | ||
204 | assert(query); | ||
205 | return query->network_outputs; | ||
206 | } | ||
207 | |||
208 | int nnNetInputSize(const nnNeuralNetwork* net) { | ||
209 | assert(net); | ||
210 | assert(net->num_layers > 0); | ||
211 | return net->weights[0].rows; | ||
212 | } | ||
213 | |||
214 | int nnNetOutputSize(const nnNeuralNetwork* net) { | ||
215 | assert(net); | ||
216 | assert(net->num_layers > 0); | ||
217 | return net->weights[net->num_layers - 1].cols; | ||
218 | } | ||
219 | |||
220 | int nnLayerInputSize(const nnMatrix* weights) { | ||
221 | assert(weights); | ||
222 | return weights->rows; | ||
223 | } | ||
224 | |||
225 | int nnLayerOutputSize(const nnMatrix* weights) { | ||
226 | assert(weights); | ||
227 | return weights->cols; | ||
228 | } | ||
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h new file mode 100644 index 0000000..26107b5 --- /dev/null +++ b/src/lib/src/neuralnet_impl.h | |||
@@ -0,0 +1,36 @@ | |||
1 | #pragma once | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | |||
5 | /// Neural network object. | ||
6 | /// | ||
7 | /// We store the transposes of the weight matrices so that we can do forward | ||
8 | /// passes with a minimal amount of work. That is, if in paper we write: | ||
9 | /// | ||
10 | /// [w11 w21] | ||
11 | /// [w12 w22] | ||
12 | /// | ||
13 | /// then the weight matrix in memory is stored as the following array: | ||
14 | /// | ||
15 | /// w11 w12 w21 w22 | ||
16 | typedef struct nnNeuralNetwork { | ||
17 | int num_layers; // Number of non-input layers (hidden + output). | ||
18 | nnMatrix* weights; // One matrix per non-input layer. | ||
19 | nnMatrix* biases; // One vector per non-input layer. | ||
20 | nnActivation* activations; // One per non-input layer. | ||
21 | } nnNeuralNetwork; | ||
22 | |||
23 | /// A query object that holds all the memory necessary to query a network. | ||
24 | /// | ||
25 | /// |layer_outputs| is an array of matrices of intermediate layer outputs. There | ||
26 | /// is one matrix per intermediate layer. Each matrix holds the layer's output, | ||
27 | /// with one row per input, and as many columns as the layer's output size (the | ||
28 | /// output vector is transposed.) | ||
29 | /// | ||
30 | /// |network_outputs| points to the last output matrix in |layer_outputs| for | ||
31 | /// convenience. | ||
32 | typedef struct nnQueryObject { | ||
33 | int num_layers; | ||
34 | nnMatrix* layer_outputs; // Output matrices, one output per layer. | ||
35 | nnMatrix* network_outputs; // Points to the last output matrix. | ||
36 | } nnTrainingQueryObject; | ||
diff --git a/src/lib/src/train.c b/src/lib/src/train.c new file mode 100644 index 0000000..027de66 --- /dev/null +++ b/src/lib/src/train.c | |||
@@ -0,0 +1,346 @@ | |||
1 | #include <neuralnet/train.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include "neuralnet_impl.h" | ||
5 | |||
6 | #include <random/mt19937-64.h> | ||
7 | #include <random/normal.h> | ||
8 | |||
9 | #include <assert.h> | ||
10 | #include <math.h> | ||
11 | #include <stdlib.h> | ||
12 | |||
13 | #include <stdio.h> | ||
14 | #define LOGD printf | ||
15 | |||
16 | // If debug mode is requested, we will show progress every this many iterations. | ||
17 | static const int PROGRESS_THRESHOLD = 5; // % | ||
18 | |||
19 | /// Computes the total MSE from the output error matrix. | ||
20 | R ComputeMSE(const nnMatrix* errors) { | ||
21 | R sum_sq = 0; | ||
22 | const int N = errors->rows * errors->cols; | ||
23 | const R* value = errors->values; | ||
24 | for (int i = 0; i < N; ++i) { | ||
25 | sum_sq += *value * *value; | ||
26 | value++; | ||
27 | } | ||
28 | return sum_sq / (R)N; | ||
29 | } | ||
30 | |||
31 | /// Holds the bits required to compute a sigmoid gradient. | ||
32 | typedef struct nnSigmoidGradientElements { | ||
33 | nnMatrix ones; // A vector of just ones, same size as the layer. | ||
34 | } nnSigmoidGradientElements; | ||
35 | |||
36 | /// Holds the various elements required to compute gradients. These depend on | ||
37 | /// what activation function are used, so they'll potentially be different for | ||
38 | /// each layer. A data type is defined for these because we allocate all the | ||
39 | /// required memory up front before entering the training loop. | ||
40 | typedef struct nnGradientElements { | ||
41 | nnActivation type; | ||
42 | // Gradient vector, same size as the layer. | ||
43 | // This will contain the gradient expression except for the output value of | ||
44 | // the previous layer. | ||
45 | nnMatrix gradient; | ||
46 | union { | ||
47 | nnSigmoidGradientElements sigmoid; | ||
48 | }; | ||
49 | } nnGradientElements; | ||
50 | |||
51 | // Initialize the network's weights randomly and set their biases to 0. | ||
52 | void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) { | ||
53 | assert(net); | ||
54 | |||
55 | mt19937_64 rng = mt19937_64_make(); | ||
56 | mt19937_64_init(&rng, seed); | ||
57 | |||
58 | for (int l = 0; l < net->num_layers; ++l) { | ||
59 | nnMatrix* weights = &net->weights[l]; | ||
60 | nnMatrix* biases = &net->biases[l]; | ||
61 | |||
62 | const R layer_size = (R)nnLayerInputSize(weights); | ||
63 | const R scale = 1. / layer_size; | ||
64 | const R stdev = 1. / sqrt((R)layer_size); | ||
65 | const R sigma = stdev * stdev; | ||
66 | |||
67 | R* value = weights->values; | ||
68 | for (int k = 0; k < weights->rows * weights->cols; ++k) { | ||
69 | switch (strategy) { | ||
70 | case nnWeightInit01: { | ||
71 | const R x01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. | ||
72 | *value++ = scale * x01; | ||
73 | break; | ||
74 | } | ||
75 | case nnWeightInit11: { | ||
76 | const R x11 = mt19937_64_gen_real4(&rng); // (-1, +1) interval. | ||
77 | *value++ = scale * x11; | ||
78 | break; | ||
79 | } | ||
80 | case nnWeightInitNormal: | ||
81 | // Using initialization with a normal distribution of standard | ||
82 | // deviation 1 / sqrt(num_layer_weights) to prevent saturation when | ||
83 | // multiplying inputs. | ||
84 | const R u01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. | ||
85 | const R v01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. | ||
86 | R z0, z1; | ||
87 | normal2(u01, v01, &z0, &z1); | ||
88 | z0 = normal_transform(z0, /*mu=*/0, sigma); | ||
89 | z1 = normal_transform(z1, /*mu=*/0, sigma); | ||
90 | *value++ = z0; | ||
91 | if (k < weights->rows * weights->cols - 1) { | ||
92 | *value++ = z1; | ||
93 | ++k; | ||
94 | } | ||
95 | break; | ||
96 | default: | ||
97 | assert(false); | ||
98 | } | ||
99 | } | ||
100 | |||
101 | // Initialize biases. | ||
102 | // 0 is used so that functions originally go through the origin. | ||
103 | value = biases->values; | ||
104 | for (int k = 0; k < biases->rows * biases->cols; ++k, ++value) { | ||
105 | *value = 0; | ||
106 | } | ||
107 | } | ||
108 | } | ||
109 | |||
110 | // |inputs| has one row vector per sample. | ||
111 | // |targets| has one row vector per sample. | ||
112 | // | ||
113 | // For now, each iteration trains with one sample (row) at a time. | ||
114 | void nnTrain( | ||
115 | nnNeuralNetwork* net, | ||
116 | const nnMatrix* inputs, | ||
117 | const nnMatrix* targets, | ||
118 | const nnTrainingParams* params) { | ||
119 | assert(net); | ||
120 | assert(inputs); | ||
121 | assert(targets); | ||
122 | assert(params); | ||
123 | assert(nnNetOutputSize(net) == targets->cols); | ||
124 | assert(net->num_layers > 0); | ||
125 | |||
126 | // Allocate error vectors to hold the backpropagated error values. | ||
127 | // For now, these are one row vector per layer, meaning that we will train | ||
128 | // with one sample at a time. | ||
129 | nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); | ||
130 | |||
131 | // Allocate the weight transpose matrices up front for backpropagation. | ||
132 | nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
133 | |||
134 | // Allocate the weight delta matrices. | ||
135 | nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); | ||
136 | |||
137 | // Allocate the data structures required to compute gradients. | ||
138 | // This depends on each layer's activation type. | ||
139 | nnGradientElements* gradient_elems = calloc(net->num_layers, sizeof(nnGradientElements)); | ||
140 | |||
141 | // Allocate the output transpose vectors for weight delta calculation. | ||
142 | // This is one column vector per layer. | ||
143 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
144 | |||
145 | assert(errors != 0); | ||
146 | assert(weights_T != 0); | ||
147 | assert(weight_deltas != 0); | ||
148 | assert(gradient_elems); | ||
149 | assert(outputs_T); | ||
150 | |||
151 | for (int l = 0; l < net->num_layers; ++l) { | ||
152 | const nnMatrix* layer_weights = &net->weights[l]; | ||
153 | const int layer_output_size = net->weights[l].cols; | ||
154 | const nnActivation activation = net->activations[l]; | ||
155 | |||
156 | errors[l] = nnMatrixMake(1, layer_weights->cols); | ||
157 | |||
158 | weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); | ||
159 | nnMatrixTranspose(layer_weights, &weights_T[l]); | ||
160 | |||
161 | weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols); | ||
162 | |||
163 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | ||
164 | |||
165 | // Allocate the gradient elements and vectors for weight delta calculation. | ||
166 | nnGradientElements* elems = &gradient_elems[l]; | ||
167 | elems->type = activation; | ||
168 | switch (activation) { | ||
169 | case nnIdentity: | ||
170 | break; // Gradient vector will be borrowed, no need to allocate. | ||
171 | |||
172 | case nnSigmoid: | ||
173 | elems->gradient = nnMatrixMake(1, layer_output_size); | ||
174 | // Allocate the 1s vectors. | ||
175 | elems->sigmoid.ones = nnMatrixMake(1, layer_output_size); | ||
176 | nnMatrixInitConstant(&elems->sigmoid.ones, 1); | ||
177 | break; | ||
178 | |||
179 | case nnRelu: | ||
180 | elems->gradient = nnMatrixMake(1, layer_output_size); | ||
181 | break; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | // Construct the query object with a size of 1 since we are training with one | ||
186 | // sample at a time. | ||
187 | nnQueryObject* query = nnMakeQueryObject(net, 1); | ||
188 | |||
189 | // Network outputs are given by the query object. Every network query updates | ||
190 | // the outputs. | ||
191 | const nnMatrix* const training_outputs = query->network_outputs; | ||
192 | |||
193 | // A vector to store the training input transposed. | ||
194 | nnMatrix training_inputs_T = nnMatrixMake(inputs->cols, 1); | ||
195 | |||
196 | // If debug mode is requested, we will show progress every Nth iteration. | ||
197 | const int progress_frame = | ||
198 | (params->max_iterations < PROGRESS_THRESHOLD) | ||
199 | ? 1 | ||
200 | : (params->max_iterations * PROGRESS_THRESHOLD / 100); | ||
201 | |||
202 | // --- TRAIN | ||
203 | |||
204 | nnInitNet(net, params->seed, params->weight_init); | ||
205 | |||
206 | for (int iter = 0; iter < params->max_iterations; ++iter) { | ||
207 | |||
208 | // For now, we train with one sample at a time. | ||
209 | for (int sample = 0; sample < inputs->rows; ++sample) { | ||
210 | // Slice the input and target matrices with the batch size. | ||
211 | // We are not mutating the inputs, but we need the cast to borrow. | ||
212 | nnMatrix training_inputs = nnMatrixBorrowRows((nnMatrix*)inputs, sample, 1); | ||
213 | nnMatrix training_targets = nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); | ||
214 | |||
215 | // Will need the input transposed for backpropagation. | ||
216 | // Assuming one training input per iteration for now. | ||
217 | nnMatrixTranspose(&training_inputs, &training_inputs_T); | ||
218 | |||
219 | // Run a forward pass and compute the output layer error. | ||
220 | // We don't square the error here; instead, we just compute t-o, which is | ||
221 | // part of the derivative, -2(t-o). Also, we compute o-t instead to | ||
222 | // remove that outer negative sign. | ||
223 | nnQuery(net, query, &training_inputs); | ||
224 | //nnMatrixSub(&training_targets, training_outputs, &errors[net->num_layers - 1]); | ||
225 | nnMatrixSub(training_outputs, &training_targets, &errors[net->num_layers - 1]); | ||
226 | |||
227 | // Update outputs_T, which we need during weight updates. | ||
228 | for (int l = 0; l < net->num_layers; ++l) { | ||
229 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); | ||
230 | } | ||
231 | |||
232 | // Update weights and biases for each internal layer, backpropagating | ||
233 | // errors along the way. | ||
234 | for (int l = net->num_layers - 1; l >= 0; --l) { | ||
235 | const nnMatrix* layer_output = &query->layer_outputs[l]; | ||
236 | nnMatrix* layer_weights = &net->weights[l]; | ||
237 | nnMatrix* layer_biases = &net->biases[l]; | ||
238 | nnGradientElements* elems = &gradient_elems[l]; | ||
239 | nnMatrix* gradient = &elems->gradient; | ||
240 | const nnActivation activation = net->activations[l]; | ||
241 | |||
242 | // Compute the gradient (the part of the expression that does not | ||
243 | // contain the output of the previous layer). | ||
244 | // | ||
245 | // Identity: G = error_k | ||
246 | // Sigmoid: G = error_k * output_k * (1 - output_k). | ||
247 | // Relu: G = error_k * (output_k > 0 ? 1 : 0) | ||
248 | switch (activation) { | ||
249 | case nnIdentity: | ||
250 | // TODO: Just copy the pointer? | ||
251 | *gradient = nnMatrixBorrow(&errors[l]); | ||
252 | break; | ||
253 | case nnSigmoid: | ||
254 | nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); | ||
255 | nnMatrixMulPairs(layer_output, gradient, gradient); | ||
256 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
257 | break; | ||
258 | case nnRelu: | ||
259 | nnMatrixGt(layer_output, 0, gradient); | ||
260 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
261 | break; | ||
262 | } | ||
263 | |||
264 | // Outer product to compute the weight deltas. | ||
265 | const nnMatrix* output_T = (l == 0) ? &training_inputs_T : &outputs_T[l-1]; | ||
266 | nnMatrixMul(output_T, gradient, &weight_deltas[l]); | ||
267 | |||
268 | // Backpropagate the error before updating weights. | ||
269 | if (l > 0) { | ||
270 | nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); | ||
271 | } | ||
272 | |||
273 | // Update weights. | ||
274 | nnMatrixScale(&weight_deltas[l], params->learning_rate); | ||
275 | // The gradient has a negative sign from -(t - o), but we have computed | ||
276 | // e = o - t instead, so we can subtract directly. | ||
277 | //nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); | ||
278 | nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); | ||
279 | |||
280 | // Update weight transpose matrix for the next training iteration. | ||
281 | nnMatrixTranspose(layer_weights, &weights_T[l]); | ||
282 | |||
283 | // Update biases. | ||
284 | // This is the same formula as for weights, except that the o_j term is | ||
285 | // just 1. We can simply re-use the gradient that we have already | ||
286 | // computed for the weight update. | ||
287 | //nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, layer_biases); | ||
288 | nnMatrixMulSub(layer_biases, gradient, params->learning_rate, layer_biases); | ||
289 | } | ||
290 | |||
291 | // TODO: Add this under a verbose debugging mode. | ||
292 | // if (params->debug) { | ||
293 | // LOGD("Iter: %d, Sample: %d, Error: %f\n", iter, sample, ComputeMSE(&errors[net->num_layers - 1])); | ||
294 | // LOGD("TGT: "); | ||
295 | // for (int i = 0; i < training_targets.cols; ++i) { | ||
296 | // printf("%.3f ", training_targets.values[i]); | ||
297 | // } | ||
298 | // printf("\n"); | ||
299 | // LOGD("OUT: "); | ||
300 | // for (int i = 0; i < training_outputs->cols; ++i) { | ||
301 | // printf("%.3f ", training_outputs->values[i]); | ||
302 | // } | ||
303 | // printf("\n"); | ||
304 | // } | ||
305 | } | ||
306 | |||
307 | if (params->debug && ((iter % progress_frame) == 0)) { | ||
308 | LOGD("Iter: %d/%d, Error: %f\n", | ||
309 | iter, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); | ||
310 | } | ||
311 | } | ||
312 | |||
313 | // Print the final error. | ||
314 | if (params->debug) { | ||
315 | LOGD("Iter: %d/%d, Error: %f\n", | ||
316 | params->max_iterations, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); | ||
317 | } | ||
318 | |||
319 | for (int l = 0; l < net->num_layers; ++l) { | ||
320 | nnMatrixDel(&errors[l]); | ||
321 | nnMatrixDel(&outputs_T[l]); | ||
322 | nnMatrixDel(&weights_T[l]); | ||
323 | nnMatrixDel(&weight_deltas[l]); | ||
324 | |||
325 | nnGradientElements* elems = &gradient_elems[l]; | ||
326 | switch (elems->type) { | ||
327 | case nnIdentity: | ||
328 | break; // Gradient vector is borrowed, no need to deallocate. | ||
329 | |||
330 | case nnSigmoid: | ||
331 | nnMatrixDel(&elems->gradient); | ||
332 | nnMatrixDel(&elems->sigmoid.ones); | ||
333 | break; | ||
334 | |||
335 | case nnRelu: | ||
336 | nnMatrixDel(&elems->gradient); | ||
337 | break; | ||
338 | } | ||
339 | } | ||
340 | nnMatrixDel(&training_inputs_T); | ||
341 | free(errors); | ||
342 | free(outputs_T); | ||
343 | free(weights_T); | ||
344 | free(weight_deltas); | ||
345 | free(gradient_elems); | ||
346 | } | ||
diff --git a/src/lib/test/matrix_test.c b/src/lib/test/matrix_test.c new file mode 100644 index 0000000..8191c97 --- /dev/null +++ b/src/lib/test/matrix_test.c | |||
@@ -0,0 +1,350 @@ | |||
1 | #include <neuralnet/matrix.h> | ||
2 | |||
3 | #include "test.h" | ||
4 | #include "test_util.h" | ||
5 | |||
6 | #include <assert.h> | ||
7 | #include <stdlib.h> | ||
8 | |||
9 | // static void PrintMatrix(const nnMatrix* matrix) { | ||
10 | // assert(matrix); | ||
11 | |||
12 | // for (int i = 0; i < matrix->rows; ++i) { | ||
13 | // for (int j = 0; j < matrix->cols; ++j) { | ||
14 | // printf("%f ", nnMatrixAt(matrix, i, j)); | ||
15 | // } | ||
16 | // printf("\n"); | ||
17 | // } | ||
18 | // } | ||
19 | |||
20 | TEST_CASE(nnMatrixMake_1x1) { | ||
21 | nnMatrix A = nnMatrixMake(1, 1); | ||
22 | TEST_EQUAL(A.rows, 1); | ||
23 | TEST_EQUAL(A.cols, 1); | ||
24 | } | ||
25 | |||
26 | TEST_CASE(nnMatrixMake_3x1) { | ||
27 | nnMatrix A = nnMatrixMake(3, 1); | ||
28 | TEST_EQUAL(A.rows, 3); | ||
29 | TEST_EQUAL(A.cols, 1); | ||
30 | } | ||
31 | |||
32 | TEST_CASE(nnMatrixInit_3x1) { | ||
33 | nnMatrix A = nnMatrixMake(3, 1); | ||
34 | nnMatrixInit(&A, (R[]) { 1, 2, 3 }); | ||
35 | TEST_EQUAL(A.values[0], 1); | ||
36 | TEST_EQUAL(A.values[1], 2); | ||
37 | TEST_EQUAL(A.values[2], 3); | ||
38 | } | ||
39 | |||
40 | TEST_CASE(nnMatrixCopyCol_test) { | ||
41 | nnMatrix A = nnMatrixMake(3, 2); | ||
42 | nnMatrix B = nnMatrixMake(3, 1); | ||
43 | |||
44 | nnMatrixInit(&A, (R[]) { | ||
45 | 1, 2, | ||
46 | 3, 4, | ||
47 | 5, 6, | ||
48 | }); | ||
49 | |||
50 | nnMatrixCopyCol(&A, &B, 1, 0); | ||
51 | |||
52 | TEST_EQUAL(nnMatrixAt(&B, 0, 0), 2); | ||
53 | TEST_EQUAL(nnMatrixAt(&B, 1, 0), 4); | ||
54 | TEST_EQUAL(nnMatrixAt(&B, 2, 0), 6); | ||
55 | |||
56 | nnMatrixDel(&A); | ||
57 | nnMatrixDel(&B); | ||
58 | } | ||
59 | |||
60 | TEST_CASE(nnMatrixMul_square_3x3) { | ||
61 | nnMatrix A = nnMatrixMake(3, 3); | ||
62 | nnMatrix B = nnMatrixMake(3, 3); | ||
63 | nnMatrix O = nnMatrixMake(3, 3); | ||
64 | |||
65 | nnMatrixInit(&A, (const R[]){ | ||
66 | 1, 2, 3, | ||
67 | 4, 5, 6, | ||
68 | 7, 8, 9, | ||
69 | }); | ||
70 | nnMatrixInit(&B, (const R[]){ | ||
71 | 2, 4, 3, | ||
72 | 6, 8, 5, | ||
73 | 1, 7, 9, | ||
74 | }); | ||
75 | nnMatrixMul(&A, &B, &O); | ||
76 | |||
77 | const R expected[3][3] = { | ||
78 | { 17, 41, 40 }, | ||
79 | { 44, 98, 91 }, | ||
80 | { 71, 155, 142 }, | ||
81 | }; | ||
82 | for (int i = 0; i < O.rows; ++i) { | ||
83 | for (int j = 0; j < O.cols; ++j) { | ||
84 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | nnMatrixDel(&A); | ||
89 | nnMatrixDel(&B); | ||
90 | nnMatrixDel(&O); | ||
91 | } | ||
92 | |||
93 | TEST_CASE(nnMatrixMul_non_square_2x3_3x1) { | ||
94 | nnMatrix A = nnMatrixMake(2, 3); | ||
95 | nnMatrix B = nnMatrixMake(3, 1); | ||
96 | nnMatrix O = nnMatrixMake(2, 1); | ||
97 | |||
98 | nnMatrixInit(&A, (const R[]){ | ||
99 | 1, 2, 3, | ||
100 | 4, 5, 6, | ||
101 | }); | ||
102 | nnMatrixInit(&B, (const R[]){ | ||
103 | 2, | ||
104 | 6, | ||
105 | 1, | ||
106 | }); | ||
107 | nnMatrixMul(&A, &B, &O); | ||
108 | |||
109 | const R expected[2][1] = { | ||
110 | { 17 }, | ||
111 | { 44 }, | ||
112 | }; | ||
113 | for (int i = 0; i < O.rows; ++i) { | ||
114 | for (int j = 0; j < O.cols; ++j) { | ||
115 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | nnMatrixDel(&A); | ||
120 | nnMatrixDel(&B); | ||
121 | nnMatrixDel(&O); | ||
122 | } | ||
123 | |||
124 | TEST_CASE(nnMatrixMulAdd_test) { | ||
125 | nnMatrix A = nnMatrixMake(2, 3); | ||
126 | nnMatrix B = nnMatrixMake(2, 3); | ||
127 | nnMatrix O = nnMatrixMake(2, 3); | ||
128 | const R scale = 2; | ||
129 | |||
130 | nnMatrixInit(&A, (const R[]){ | ||
131 | 1, 2, 3, | ||
132 | 4, 5, 6, | ||
133 | }); | ||
134 | nnMatrixInit(&B, (const R[]){ | ||
135 | 2, 3, 1, | ||
136 | 7, 4, 3 | ||
137 | }); | ||
138 | nnMatrixMulAdd(&A, &B, scale, &O); // O = A + B * scale | ||
139 | |||
140 | const R expected[2][3] = { | ||
141 | { 5, 8, 5 }, | ||
142 | { 18, 13, 12 }, | ||
143 | }; | ||
144 | for (int i = 0; i < O.rows; ++i) { | ||
145 | for (int j = 0; j < O.cols; ++j) { | ||
146 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
147 | } | ||
148 | } | ||
149 | |||
150 | nnMatrixDel(&A); | ||
151 | nnMatrixDel(&B); | ||
152 | nnMatrixDel(&O); | ||
153 | } | ||
154 | |||
155 | TEST_CASE(nnMatrixMulSub_test) { | ||
156 | nnMatrix A = nnMatrixMake(2, 3); | ||
157 | nnMatrix B = nnMatrixMake(2, 3); | ||
158 | nnMatrix O = nnMatrixMake(2, 3); | ||
159 | const R scale = 2; | ||
160 | |||
161 | nnMatrixInit(&A, (const R[]){ | ||
162 | 1, 2, 3, | ||
163 | 4, 5, 6, | ||
164 | }); | ||
165 | nnMatrixInit(&B, (const R[]){ | ||
166 | 2, 3, 1, | ||
167 | 7, 4, 3 | ||
168 | }); | ||
169 | nnMatrixMulSub(&A, &B, scale, &O); // O = A - B * scale | ||
170 | |||
171 | const R expected[2][3] = { | ||
172 | { -3, -4, 1 }, | ||
173 | { -10, -3, 0 }, | ||
174 | }; | ||
175 | for (int i = 0; i < O.rows; ++i) { | ||
176 | for (int j = 0; j < O.cols; ++j) { | ||
177 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | nnMatrixDel(&A); | ||
182 | nnMatrixDel(&B); | ||
183 | nnMatrixDel(&O); | ||
184 | } | ||
185 | |||
186 | TEST_CASE(nnMatrixMulPairs_2x3) { | ||
187 | nnMatrix A = nnMatrixMake(2, 3); | ||
188 | nnMatrix B = nnMatrixMake(2, 3); | ||
189 | nnMatrix O = nnMatrixMake(2, 3); | ||
190 | |||
191 | nnMatrixInit(&A, (const R[]){ | ||
192 | 1, 2, 3, | ||
193 | 4, 5, 6, | ||
194 | }); | ||
195 | nnMatrixInit(&B, (const R[]){ | ||
196 | 2, 3, 1, | ||
197 | 7, 4, 3 | ||
198 | }); | ||
199 | nnMatrixMulPairs(&A, &B, &O); | ||
200 | |||
201 | const R expected[2][3] = { | ||
202 | { 2, 6, 3 }, | ||
203 | { 28, 20, 18 }, | ||
204 | }; | ||
205 | for (int i = 0; i < O.rows; ++i) { | ||
206 | for (int j = 0; j < O.cols; ++j) { | ||
207 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | nnMatrixDel(&A); | ||
212 | nnMatrixDel(&B); | ||
213 | nnMatrixDel(&O); | ||
214 | } | ||
215 | |||
216 | TEST_CASE(nnMatrixAdd_square_2x2) { | ||
217 | nnMatrix A = nnMatrixMake(2, 2); | ||
218 | nnMatrix B = nnMatrixMake(2, 2); | ||
219 | nnMatrix C = nnMatrixMake(2, 2); | ||
220 | |||
221 | nnMatrixInit(&A, (R[]) { | ||
222 | 1, 2, | ||
223 | 3, 4, | ||
224 | }); | ||
225 | nnMatrixInit(&B, (R[]) { | ||
226 | 2, 1, | ||
227 | 5, 3, | ||
228 | }); | ||
229 | |||
230 | nnMatrixAdd(&A, &B, &C); | ||
231 | |||
232 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 0), 3, EPS)); | ||
233 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 1), 3, EPS)); | ||
234 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 0), 8, EPS)); | ||
235 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 1), 7, EPS)); | ||
236 | |||
237 | nnMatrixDel(&A); | ||
238 | nnMatrixDel(&B); | ||
239 | nnMatrixDel(&C); | ||
240 | } | ||
241 | |||
242 | TEST_CASE(nnMatrixSub_square_2x2) { | ||
243 | nnMatrix A = nnMatrixMake(2, 2); | ||
244 | nnMatrix B = nnMatrixMake(2, 2); | ||
245 | nnMatrix C = nnMatrixMake(2, 2); | ||
246 | |||
247 | nnMatrixInit(&A, (R[]) { | ||
248 | 1, 2, | ||
249 | 3, 4, | ||
250 | }); | ||
251 | nnMatrixInit(&B, (R[]) { | ||
252 | 2, 1, | ||
253 | 5, 3, | ||
254 | }); | ||
255 | |||
256 | nnMatrixSub(&A, &B, &C); | ||
257 | |||
258 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 0), -1, EPS)); | ||
259 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 1), +1, EPS)); | ||
260 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 0), -2, EPS)); | ||
261 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 1), +1, EPS)); | ||
262 | |||
263 | nnMatrixDel(&A); | ||
264 | nnMatrixDel(&B); | ||
265 | nnMatrixDel(&C); | ||
266 | } | ||
267 | |||
268 | TEST_CASE(nnMatrixAddRow_test) { | ||
269 | nnMatrix A = nnMatrixMake(2, 3); | ||
270 | nnMatrix B = nnMatrixMake(1, 3); | ||
271 | nnMatrix C = nnMatrixMake(2, 3); | ||
272 | |||
273 | nnMatrixInit(&A, (R[]) { | ||
274 | 1, 2, 3, | ||
275 | 4, 5, 6, | ||
276 | }); | ||
277 | nnMatrixInit(&B, (R[]) { | ||
278 | 2, 1, 3, | ||
279 | }); | ||
280 | |||
281 | nnMatrixAddRow(&A, &B, &C); | ||
282 | |||
283 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 0), 3, EPS)); | ||
284 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 1), 3, EPS)); | ||
285 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 2), 6, EPS)); | ||
286 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 0), 6, EPS)); | ||
287 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 1), 6, EPS)); | ||
288 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 2), 9, EPS)); | ||
289 | |||
290 | nnMatrixDel(&A); | ||
291 | nnMatrixDel(&B); | ||
292 | nnMatrixDel(&C); | ||
293 | } | ||
294 | |||
295 | TEST_CASE(nnMatrixTranspose_square_2x2) { | ||
296 | nnMatrix A = nnMatrixMake(2, 2); | ||
297 | nnMatrix B = nnMatrixMake(2, 2); | ||
298 | |||
299 | nnMatrixInit(&A, (R[]) { | ||
300 | 1, 2, | ||
301 | 3, 4 | ||
302 | }); | ||
303 | |||
304 | nnMatrixTranspose(&A, &B); | ||
305 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 0), 1, EPS)); | ||
306 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 1), 3, EPS)); | ||
307 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 0), 2, EPS)); | ||
308 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 1), 4, EPS)); | ||
309 | |||
310 | nnMatrixDel(&A); | ||
311 | nnMatrixDel(&B); | ||
312 | } | ||
313 | |||
314 | TEST_CASE(nnMatrixTranspose_non_square_2x1) { | ||
315 | nnMatrix A = nnMatrixMake(2, 1); | ||
316 | nnMatrix B = nnMatrixMake(1, 2); | ||
317 | |||
318 | nnMatrixInit(&A, (R[]) { | ||
319 | 1, | ||
320 | 3, | ||
321 | }); | ||
322 | |||
323 | nnMatrixTranspose(&A, &B); | ||
324 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 0), 1, EPS)); | ||
325 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 1), 3, EPS)); | ||
326 | |||
327 | nnMatrixDel(&A); | ||
328 | nnMatrixDel(&B); | ||
329 | } | ||
330 | |||
331 | TEST_CASE(nnMatrixGt_test) { | ||
332 | nnMatrix A = nnMatrixMake(2, 3); | ||
333 | nnMatrix B = nnMatrixMake(2, 3); | ||
334 | |||
335 | nnMatrixInit(&A, (R[]) { | ||
336 | -3, 2, 0, | ||
337 | 4, -1, 5 | ||
338 | }); | ||
339 | |||
340 | nnMatrixGt(&A, 0, &B); | ||
341 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 0), 0, EPS)); | ||
342 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 1), 1, EPS)); | ||
343 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 2), 0, EPS)); | ||
344 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 0), 1, EPS)); | ||
345 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 1), 0, EPS)); | ||
346 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 2), 1, EPS)); | ||
347 | |||
348 | nnMatrixDel(&A); | ||
349 | nnMatrixDel(&B); | ||
350 | } | ||
diff --git a/src/lib/test/neuralnet_test.c b/src/lib/test/neuralnet_test.c new file mode 100644 index 0000000..14d9438 --- /dev/null +++ b/src/lib/test/neuralnet_test.c | |||
@@ -0,0 +1,92 @@ | |||
1 | #include <neuralnet/neuralnet.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include "activation.h" | ||
5 | #include "neuralnet_impl.h" | ||
6 | |||
7 | #include "test.h" | ||
8 | #include "test_util.h" | ||
9 | |||
10 | #include <assert.h> | ||
11 | |||
12 | TEST_CASE(neuralnet_perceptron_test) { | ||
13 | const int num_layers = 1; | ||
14 | const int layer_sizes[] = { 1, 1 }; | ||
15 | const nnActivation layer_activations[] = { nnSigmoid }; | ||
16 | const R weights[] = { 0.3 }; | ||
17 | |||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
19 | assert(net); | ||
20 | nnSetWeights(net, weights); | ||
21 | |||
22 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
23 | |||
24 | const R input[] = { 0.9 }; | ||
25 | R output[1]; | ||
26 | nnQueryArray(net, query, input, output); | ||
27 | |||
28 | const R expected_output = sigmoid(input[0] * weights[0]); | ||
29 | printf("\nOutput: %f, Expected: %f\n", output[0], expected_output); | ||
30 | TEST_TRUE(double_eq(output[0], expected_output, EPS)); | ||
31 | |||
32 | nnDeleteQueryObject(&query); | ||
33 | nnDeleteNet(&net); | ||
34 | } | ||
35 | |||
36 | TEST_CASE(neuralnet_xor_test) { | ||
37 | const int num_layers = 2; | ||
38 | const int layer_sizes[] = { 2, 2, 1 }; | ||
39 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | ||
40 | const R weights[] = { | ||
41 | 1, 1, 1, 1, // First (hidden) layer. | ||
42 | 1, -2 // Second (output) layer. | ||
43 | }; | ||
44 | const R biases[] = { | ||
45 | 0, -1, // First (hidden) layer. | ||
46 | 0 // Second (output) layer. | ||
47 | }; | ||
48 | |||
49 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
50 | assert(net); | ||
51 | nnSetWeights(net, weights); | ||
52 | nnSetBiases(net, biases); | ||
53 | |||
54 | // First layer weights. | ||
55 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 0), 1); | ||
56 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 1), 1); | ||
57 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 2), 1); | ||
58 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 3), 1); | ||
59 | // Second layer weights. | ||
60 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 0), 1); | ||
61 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 1), -2); | ||
62 | // First layer biases. | ||
63 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 0), 0); | ||
64 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 1), -1); | ||
65 | // Second layer biases. | ||
66 | TEST_EQUAL(nnMatrixAt(&net->biases[1], 0, 0), 0); | ||
67 | |||
68 | // Test. | ||
69 | |||
70 | #define M 4 | ||
71 | |||
72 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | ||
73 | |||
74 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | ||
75 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | ||
76 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | ||
77 | nnQuery(net, query, &test_inputs_matrix); | ||
78 | |||
79 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | ||
80 | for (int i = 0; i < M; ++i) { | ||
81 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
82 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | ||
83 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | ||
84 | } | ||
85 | for (int i = 0; i < M; ++i) { | ||
86 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
87 | TEST_TRUE(double_eq(test_output, expected_outputs[i], OUTPUT_EPS)); | ||
88 | } | ||
89 | |||
90 | nnDeleteQueryObject(&query); | ||
91 | nnDeleteNet(&net); | ||
92 | } | ||
diff --git a/src/lib/test/test.h b/src/lib/test/test.h new file mode 100644 index 0000000..fd8dc22 --- /dev/null +++ b/src/lib/test/test.h | |||
@@ -0,0 +1,185 @@ | |||
1 | // SPDX-License-Identifier: MIT | ||
2 | #pragma once | ||
3 | |||
4 | #ifdef UNIT_TEST | ||
5 | |||
6 | #include <stdbool.h> | ||
7 | #include <stdio.h> | ||
8 | #include <stdlib.h> | ||
9 | #include <string.h> | ||
10 | |||
11 | #if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ | ||
12 | defined(__NetBSD__) || defined(__OpenBSD__) | ||
13 | #define USE_SYSCTL_FOR_ARGS 1 | ||
14 | // clang-format off | ||
15 | #include <sys/types.h> | ||
16 | #include <sys/sysctl.h> | ||
17 | // clang-format on | ||
18 | #include <unistd.h> // getpid | ||
19 | #endif | ||
20 | |||
21 | struct test_file_metadata; | ||
22 | |||
23 | struct test_failure { | ||
24 | bool present; | ||
25 | const char *message; | ||
26 | const char *file; | ||
27 | int line; | ||
28 | }; | ||
29 | |||
30 | struct test_case_metadata { | ||
31 | void (*fn)(struct test_case_metadata *, struct test_file_metadata *); | ||
32 | struct test_failure failure; | ||
33 | const char *name; | ||
34 | struct test_case_metadata *next; | ||
35 | }; | ||
36 | |||
37 | struct test_file_metadata { | ||
38 | bool registered; | ||
39 | const char *name; | ||
40 | struct test_file_metadata *next; | ||
41 | struct test_case_metadata *tests; | ||
42 | }; | ||
43 | |||
44 | struct test_file_metadata __attribute__((weak)) * test_file_head; | ||
45 | |||
46 | #define SET_FAILURE(_message) \ | ||
47 | metadata->failure = (struct test_failure) { \ | ||
48 | .message = _message, .file = __FILE__, .line = __LINE__, .present = true, \ | ||
49 | } | ||
50 | |||
51 | #define TEST_EQUAL(a, b) \ | ||
52 | do { \ | ||
53 | if ((a) != (b)) { \ | ||
54 | SET_FAILURE(#a " != " #b); \ | ||
55 | return; \ | ||
56 | } \ | ||
57 | } while (0) | ||
58 | |||
59 | #define TEST_TRUE(a) \ | ||
60 | do { \ | ||
61 | if (!(a)) { \ | ||
62 | SET_FAILURE(#a " is not true"); \ | ||
63 | return; \ | ||
64 | } \ | ||
65 | } while (0) | ||
66 | |||
67 | #define TEST_STREQUAL(a, b) \ | ||
68 | do { \ | ||
69 | if (strcmp(a, b) != 0) { \ | ||
70 | SET_FAILURE(#a " != " #b); \ | ||
71 | return; \ | ||
72 | } \ | ||
73 | } while (0) | ||
74 | |||
75 | #define TEST_CASE(_name) \ | ||
76 | static void __test_h_##_name(struct test_case_metadata *, \ | ||
77 | struct test_file_metadata *); \ | ||
78 | static struct test_file_metadata __test_h_file; \ | ||
79 | static struct test_case_metadata __test_h_meta_##_name = { \ | ||
80 | .name = #_name, \ | ||
81 | .fn = __test_h_##_name, \ | ||
82 | }; \ | ||
83 | static void __attribute__((constructor(101))) __test_h_##_name##_register(void) { \ | ||
84 | __test_h_meta_##_name.next = __test_h_file.tests; \ | ||
85 | __test_h_file.tests = &__test_h_meta_##_name; \ | ||
86 | if (!__test_h_file.registered) { \ | ||
87 | __test_h_file.name = __FILE__; \ | ||
88 | __test_h_file.next = test_file_head; \ | ||
89 | test_file_head = &__test_h_file; \ | ||
90 | __test_h_file.registered = true; \ | ||
91 | } \ | ||
92 | } \ | ||
93 | static void __test_h_##_name( \ | ||
94 | struct test_case_metadata *metadata __attribute__((unused)), \ | ||
95 | struct test_file_metadata *file_metadata __attribute__((unused))) | ||
96 | |||
97 | extern void __attribute__((weak)) (*test_h_unittest_setup)(void); | ||
98 | /// Run defined tests, return true if all tests succeeds | ||
99 | /// @param[out] tests_run if not NULL, set to whether tests were run | ||
100 | static inline void __attribute__((constructor(102))) run_tests(void) { | ||
101 | bool should_run = false; | ||
102 | #ifdef USE_SYSCTL_FOR_ARGS | ||
103 | int mib[] = { | ||
104 | CTL_KERN, | ||
105 | #if defined(__NetBSD__) || defined(__OpenBSD__) | ||
106 | KERN_PROC_ARGS, | ||
107 | getpid(), | ||
108 | KERN_PROC_ARGV, | ||
109 | #else | ||
110 | KERN_PROC, | ||
111 | KERN_PROC_ARGS, | ||
112 | getpid(), | ||
113 | #endif | ||
114 | }; | ||
115 | char *arg = NULL; | ||
116 | size_t arglen; | ||
117 | sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &arglen, NULL, 0); | ||
118 | arg = malloc(arglen); | ||
119 | sysctl(mib, sizeof(mib) / sizeof(mib[0]), arg, &arglen, NULL, 0); | ||
120 | #else | ||
121 | FILE *cmdlinef = fopen("/proc/self/cmdline", "r"); | ||
122 | char *arg = NULL; | ||
123 | int arglen; | ||
124 | fscanf(cmdlinef, "%ms%n", &arg, &arglen); | ||
125 | fclose(cmdlinef); | ||
126 | #endif | ||
127 | for (char *pos = arg; pos < arg + arglen; pos += strlen(pos) + 1) { | ||
128 | if (strcmp(pos, "--unittest") == 0) { | ||
129 | should_run = true; | ||
130 | break; | ||
131 | } | ||
132 | } | ||
133 | free(arg); | ||
134 | |||
135 | if (!should_run) { | ||
136 | return; | ||
137 | } | ||
138 | |||
139 | if (&test_h_unittest_setup) { | ||
140 | test_h_unittest_setup(); | ||
141 | } | ||
142 | |||
143 | struct test_file_metadata *i = test_file_head; | ||
144 | int failed = 0, success = 0; | ||
145 | while (i) { | ||
146 | fprintf(stderr, "Running tests from %s:\n", i->name); | ||
147 | struct test_case_metadata *j = i->tests; | ||
148 | while (j) { | ||
149 | fprintf(stderr, "\t%s ... ", j->name); | ||
150 | j->failure.present = false; | ||
151 | j->fn(j, i); | ||
152 | if (j->failure.present) { | ||
153 | fprintf(stderr, "failed (%s at %s:%d)\n", j->failure.message, | ||
154 | j->failure.file, j->failure.line); | ||
155 | failed++; | ||
156 | } else { | ||
157 | fprintf(stderr, "passed\n"); | ||
158 | success++; | ||
159 | } | ||
160 | j = j->next; | ||
161 | } | ||
162 | fprintf(stderr, "\n"); | ||
163 | i = i->next; | ||
164 | } | ||
165 | int total = failed + success; | ||
166 | fprintf(stderr, "Test results: passed %d/%d, failed %d/%d\n", success, total, | ||
167 | failed, total); | ||
168 | exit(failed == 0 ? EXIT_SUCCESS : EXIT_FAILURE); | ||
169 | } | ||
170 | |||
171 | #else | ||
172 | |||
173 | #include <stdbool.h> | ||
174 | |||
175 | #define TEST_CASE(name) static void __attribute__((unused)) __test_h_##name(void) | ||
176 | |||
177 | #define TEST_EQUAL(a, b) \ | ||
178 | (void)(a); \ | ||
179 | (void)(b) | ||
180 | #define TEST_TRUE(a) (void)(a) | ||
181 | #define TEST_STREQUAL(a, b) \ | ||
182 | (void)(a); \ | ||
183 | (void)(b) | ||
184 | |||
185 | #endif | ||
diff --git a/src/lib/test/test_main.c b/src/lib/test/test_main.c new file mode 100644 index 0000000..4cce7f6 --- /dev/null +++ b/src/lib/test/test_main.c | |||
@@ -0,0 +1,3 @@ | |||
1 | int main() { | ||
2 | return 0; | ||
3 | } | ||
diff --git a/src/lib/test/test_util.h b/src/lib/test/test_util.h new file mode 100644 index 0000000..8abb99a --- /dev/null +++ b/src/lib/test/test_util.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #pragma once | ||
2 | |||
3 | #include <neuralnet/types.h> | ||
4 | |||
5 | #include <math.h> | ||
6 | |||
7 | // General epsilon for comparing values. | ||
8 | static const R EPS = 1e-10; | ||
9 | |||
10 | // Epsilon for comparing network weights after training. | ||
11 | static const R WEIGHT_EPS = 0.01; | ||
12 | |||
13 | // Epsilon for comparing network outputs after training. | ||
14 | static const R OUTPUT_EPS = 0.01; | ||
15 | |||
16 | static inline bool double_eq(double a, double b, double eps) { | ||
17 | return fabs(a - b) <= eps; | ||
18 | } | ||
19 | |||
20 | static inline R lerp(R a, R b, R t) { | ||
21 | return a + t*(b-a); | ||
22 | } | ||
diff --git a/src/lib/test/train_linear_perceptron_non_origin_test.c b/src/lib/test/train_linear_perceptron_non_origin_test.c new file mode 100644 index 0000000..5a320ac --- /dev/null +++ b/src/lib/test/train_linear_perceptron_non_origin_test.c | |||
@@ -0,0 +1,67 @@ | |||
1 | #include <neuralnet/train.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include <neuralnet/neuralnet.h> | ||
5 | #include "activation.h" | ||
6 | #include "neuralnet_impl.h" | ||
7 | |||
8 | #include "test.h" | ||
9 | #include "test_util.h" | ||
10 | |||
11 | #include <assert.h> | ||
12 | |||
13 | TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { | ||
14 | const int num_layers = 1; | ||
15 | const int layer_sizes[] = { 1, 1 }; | ||
16 | const nnActivation layer_activations[] = { nnIdentity }; | ||
17 | |||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
19 | assert(net); | ||
20 | |||
21 | // Train. | ||
22 | |||
23 | // Try to learn the Y = 2X + 1 line. | ||
24 | #define N 2 | ||
25 | const R inputs[N] = { 0., 1. }; | ||
26 | const R targets[N] = { 1., 3. }; | ||
27 | |||
28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | ||
29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
30 | nnMatrixInit(&inputs_matrix, inputs); | ||
31 | nnMatrixInit(&targets_matrix, targets); | ||
32 | |||
33 | nnTrainingParams params = { | ||
34 | .learning_rate = 0.7, | ||
35 | .max_iterations = 20, | ||
36 | .seed = 0, | ||
37 | .weight_init = nnWeightInit01, | ||
38 | .debug = false, | ||
39 | }; | ||
40 | |||
41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
42 | |||
43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | ||
44 | const R expected_weight = 2.0; | ||
45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | ||
47 | |||
48 | const R bias = nnMatrixAt(&net->biases[0], 0, 0); | ||
49 | const R expected_bias = 1.0; | ||
50 | printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); | ||
51 | TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); | ||
52 | |||
53 | // Test. | ||
54 | |||
55 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
56 | |||
57 | const R test_input[] = { 2.3 }; | ||
58 | R test_output[1]; | ||
59 | nnQueryArray(net, query, test_input, test_output); | ||
60 | |||
61 | const R expected_output = test_input[0] * expected_weight + expected_bias; | ||
62 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | ||
63 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | ||
64 | |||
65 | nnDeleteQueryObject(&query); | ||
66 | nnDeleteNet(&net); | ||
67 | } | ||
diff --git a/src/lib/test/train_linear_perceptron_test.c b/src/lib/test/train_linear_perceptron_test.c new file mode 100644 index 0000000..2b1336d --- /dev/null +++ b/src/lib/test/train_linear_perceptron_test.c | |||
@@ -0,0 +1,62 @@ | |||
1 | #include <neuralnet/train.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include <neuralnet/neuralnet.h> | ||
5 | #include "activation.h" | ||
6 | #include "neuralnet_impl.h" | ||
7 | |||
8 | #include "test.h" | ||
9 | #include "test_util.h" | ||
10 | |||
11 | #include <assert.h> | ||
12 | |||
13 | TEST_CASE(neuralnet_train_linear_perceptron_test) { | ||
14 | const int num_layers = 1; | ||
15 | const int layer_sizes[] = { 1, 1 }; | ||
16 | const nnActivation layer_activations[] = { nnIdentity }; | ||
17 | |||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
19 | assert(net); | ||
20 | |||
21 | // Train. | ||
22 | |||
23 | // Try to learn the Y=X line. | ||
24 | #define N 2 | ||
25 | const R inputs[N] = { 0., 1. }; | ||
26 | const R targets[N] = { 0., 1. }; | ||
27 | |||
28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | ||
29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
30 | nnMatrixInit(&inputs_matrix, inputs); | ||
31 | nnMatrixInit(&targets_matrix, targets); | ||
32 | |||
33 | nnTrainingParams params = { | ||
34 | .learning_rate = 0.7, | ||
35 | .max_iterations = 10, | ||
36 | .seed = 0, | ||
37 | .weight_init = nnWeightInit01, | ||
38 | .debug = false, | ||
39 | }; | ||
40 | |||
41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
42 | |||
43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | ||
44 | const R expected_weight = 1.0; | ||
45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | ||
47 | |||
48 | // Test. | ||
49 | |||
50 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
51 | |||
52 | const R test_input[] = { 2.3 }; | ||
53 | R test_output[1]; | ||
54 | nnQueryArray(net, query, test_input, test_output); | ||
55 | |||
56 | const R expected_output = test_input[0]; | ||
57 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | ||
58 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | ||
59 | |||
60 | nnDeleteQueryObject(&query); | ||
61 | nnDeleteNet(&net); | ||
62 | } | ||
diff --git a/src/lib/test/train_sigmoid_test.c b/src/lib/test/train_sigmoid_test.c new file mode 100644 index 0000000..588e7ca --- /dev/null +++ b/src/lib/test/train_sigmoid_test.c | |||
@@ -0,0 +1,66 @@ | |||
1 | #include <neuralnet/train.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include <neuralnet/neuralnet.h> | ||
5 | #include "activation.h" | ||
6 | #include "neuralnet_impl.h" | ||
7 | |||
8 | #include "test.h" | ||
9 | #include "test_util.h" | ||
10 | |||
11 | #include <assert.h> | ||
12 | |||
13 | TEST_CASE(neuralnet_train_sigmoid_test) { | ||
14 | const int num_layers = 1; | ||
15 | const int layer_sizes[] = { 1, 1 }; | ||
16 | const nnActivation layer_activations[] = { nnSigmoid }; | ||
17 | |||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
19 | assert(net); | ||
20 | |||
21 | // Train. | ||
22 | |||
23 | // Try to learn the sigmoid function. | ||
24 | #define N 3 | ||
25 | R inputs[N]; | ||
26 | R targets[N]; | ||
27 | for (int i = 0; i < N; ++i) { | ||
28 | inputs[i] = lerp(-1, +1, (R)i / (R)(N-1)); | ||
29 | targets[i] = sigmoid(inputs[i]); | ||
30 | } | ||
31 | |||
32 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | ||
33 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
34 | nnMatrixInit(&inputs_matrix, inputs); | ||
35 | nnMatrixInit(&targets_matrix, targets); | ||
36 | |||
37 | nnTrainingParams params = { | ||
38 | .learning_rate = 0.9, | ||
39 | .max_iterations = 100, | ||
40 | .seed = 0, | ||
41 | .weight_init = nnWeightInit01, | ||
42 | .debug = false, | ||
43 | }; | ||
44 | |||
45 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
46 | |||
47 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | ||
48 | const R expected_weight = 1.0; | ||
49 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
50 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | ||
51 | |||
52 | // Test. | ||
53 | |||
54 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
55 | |||
56 | const R test_input[] = { 0.3 }; | ||
57 | R test_output[1]; | ||
58 | nnQueryArray(net, query, test_input, test_output); | ||
59 | |||
60 | const R expected_output = 0.574442516811659; // sigmoid(0.3) | ||
61 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | ||
62 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | ||
63 | |||
64 | nnDeleteQueryObject(&query); | ||
65 | nnDeleteNet(&net); | ||
66 | } | ||
diff --git a/src/lib/test/train_xor_test.c b/src/lib/test/train_xor_test.c new file mode 100644 index 0000000..6ddc6e0 --- /dev/null +++ b/src/lib/test/train_xor_test.c | |||
@@ -0,0 +1,66 @@ | |||
1 | #include <neuralnet/train.h> | ||
2 | |||
3 | #include <neuralnet/matrix.h> | ||
4 | #include <neuralnet/neuralnet.h> | ||
5 | #include "activation.h" | ||
6 | #include "neuralnet_impl.h" | ||
7 | |||
8 | #include "test.h" | ||
9 | #include "test_util.h" | ||
10 | |||
11 | #include <assert.h> | ||
12 | |||
13 | TEST_CASE(neuralnet_train_xor_test) { | ||
14 | const int num_layers = 2; | ||
15 | const int layer_sizes[] = { 2, 2, 1 }; | ||
16 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | ||
17 | |||
18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
19 | assert(net); | ||
20 | |||
21 | // Train. | ||
22 | |||
23 | #define N 4 | ||
24 | const R inputs[N][2] = { { 0., 0. }, { 0., 1. }, { 1., 0. }, { 1., 1. } }; | ||
25 | const R targets[N] = { 0., 1., 1., 0. }; | ||
26 | |||
27 | nnMatrix inputs_matrix = nnMatrixMake(N, 2); | ||
28 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
29 | nnMatrixInit(&inputs_matrix, (const R*)inputs); | ||
30 | nnMatrixInit(&targets_matrix, targets); | ||
31 | |||
32 | nnTrainingParams params = { | ||
33 | .learning_rate = 0.1, | ||
34 | .max_iterations = 500, | ||
35 | .seed = 0, | ||
36 | .weight_init = nnWeightInit01, | ||
37 | .debug = false, | ||
38 | }; | ||
39 | |||
40 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
41 | |||
42 | // Test. | ||
43 | |||
44 | #define M 4 | ||
45 | |||
46 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | ||
47 | |||
48 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | ||
49 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | ||
50 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | ||
51 | nnQuery(net, query, &test_inputs_matrix); | ||
52 | |||
53 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | ||
54 | for (int i = 0; i < M; ++i) { | ||
55 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
56 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | ||
57 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | ||
58 | } | ||
59 | for (int i = 0; i < M; ++i) { | ||
60 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
61 | TEST_TRUE(double_eq(test_output, expected_outputs[i], OUTPUT_EPS)); | ||
62 | } | ||
63 | |||
64 | nnDeleteQueryObject(&query); | ||
65 | nnDeleteNet(&net); | ||
66 | } | ||