Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : MIT License
3 :
4 : Copyright (c) 2019 Wei Chen and Andrew Ferguson
5 :
6 : Permission is hereby granted, free of charge, to any person obtaining a copy
7 : of this software and associated documentation files (the "Software"), to deal
8 : in the Software without restriction, including without limitation the rights
9 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 : copies of the Software, and to permit persons to whom the Software is
11 : furnished to do so, subject to the following conditions:
12 :
13 : The above copyright notice and this permission notice shall be included in all
14 : copies or substantial portions of the Software.
15 :
16 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 : SOFTWARE.
23 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
24 : #include "function/Function.h"
25 : #include "function/ActionRegister.h"
26 : #include "cassert"
27 :
28 : #include <string>
29 : #include <cmath>
30 : #include <iostream>
31 : // #include <stdio.h>
32 :
33 : using namespace std;
34 :
35 : // #define DEBUG
36 : // #define DEBUG_2
37 : // #define DEBUG_3
38 :
39 : namespace PLMD {
40 : namespace function {
41 : namespace annfunc {
42 :
43 : //+PLUMEDOC ANNMOD_Function ANN
44 : /*
45 : Calculates the ANN-function.
46 :
47 : This module implements ANN class, which is a subclass of Function class.
48 : ANN class takes multi-dimensional arrays as inputs for a fully-connected feedforward neural network
49 : with specified neural network weights and generates corresponding outputs.
50 : The ANN outputs can be used as collective variables, inputs for other collective variables,
51 : or inputs for data analysis tools.
52 :
53 : \par Examples
54 :
55 : Assume we have an ANN with numbers of nodes being [2, 3, 1], and weights connecting layer 0 and 1 are
56 :
57 : [[1,2], [3,4], [5,6]]
58 :
59 : weights connecting layer 1 and 2 are
60 :
61 : [[7,8,9]]
62 :
63 : Bias for layer 1 and 2 are [10, 11, 12] and [13], respectively.
64 :
65 : All activation functions are Tanh.
66 :
67 : Then if input variables are l_0_out_0, l_0_out_1, the corresponding ANN function object can be defined using
68 : following plumed script:
69 :
70 : \plumedfile
71 : ANN ...
72 : LABEL=ann
73 : ARG=l_0_out_0,l_0_out_1
74 : NUM_LAYERS=3
75 : NUM_NODES=2,3,1
76 : ACTIVATIONS=Tanh,Tanh
77 : WEIGHTS0=1,2,3,4,5,6
78 : WEIGHTS1=7,8,9
79 : BIASES0=10,11,12
80 : BIASES1=13
81 : ... ANN
82 : \endplumedfile
83 :
84 : To access its components, we use "ann.node-0", "ann.node-1", ..., which represents the components of neural network outputs.
85 :
86 :
87 : */
88 : //+ENDPLUMEDOC
89 :
90 : class ANN : public Function
91 : {
92 : private:
93 : int num_layers;
94 : vector<int> num_nodes;
95 : vector<string> activations; // activation functions
96 : vector<vector<double> > weights; // flattened weight arrays
97 : vector<vector<double> > biases;
98 : vector<vector<double> > output_of_each_layer;
99 : vector<vector<double> > input_of_each_layer;
100 : vector<double** > coeff; // weight matrix arrays, reshaped from "weights"
101 :
102 : public:
103 : static void registerKeywords( Keywords& keys );
104 : explicit ANN(const ActionOptions&);
105 : virtual void calculate();
106 : void calculate_output_of_each_layer(const vector<double>& input);
107 : void back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component);
108 : };
109 :
110 10429 : PLUMED_REGISTER_ACTION(ANN,"ANN")
111 :
112 6 : void ANN::registerKeywords( Keywords& keys ) {
113 6 : Function::registerKeywords(keys);
114 12 : keys.use("ARG"); keys.use("PERIODIC");
115 12 : keys.add("compulsory", "NUM_LAYERS", "number of layers of the neural network");
116 12 : keys.add("compulsory", "NUM_NODES", "numbers of nodes in each layer of the neural network");
117 12 : keys.add("compulsory", "ACTIVATIONS", "activation functions for the neural network");
118 12 : keys.add("numbered", "WEIGHTS", "flattened weight arrays connecting adjacent layers, "
119 : "WEIGHTS0 represents flattened weight array connecting layer 0 and layer 1, "
120 : "WEIGHTS1 represents flattened weight array connecting layer 1 and layer 2, ...");
121 12 : keys.add("numbered", "BIASES", "bias array for each layer of the neural network, "
122 : "BIASES0 represents bias array for layer 1, BIASES1 represents bias array for layer 2, ...");
123 : // since v2.2 plumed requires all components be registered
124 12 : keys.addOutputComponent("node", "default", "components of ANN outputs");
125 6 : }
126 :
127 5 : ANN::ANN(const ActionOptions&ao):
128 : Action(ao),
129 5 : Function(ao)
130 : {
131 5 : parse("NUM_LAYERS", num_layers);
132 5 : num_nodes = vector<int>(num_layers);
133 10 : activations = vector<string>(num_layers - 1);
134 10 : output_of_each_layer = vector<vector<double> >(num_layers);
135 10 : input_of_each_layer = vector<vector<double> >(num_layers);
136 5 : coeff = vector<double** >(num_layers - 1);
137 5 : parseVector("NUM_NODES", num_nodes);
138 5 : parseVector("ACTIVATIONS", activations);
139 5 : log.printf("\nactivations = ");
140 15 : for (auto ss: activations) {
141 10 : log.printf("%s, ", ss.c_str());
142 : }
143 5 : log.printf("\nnum_nodes = ");
144 20 : for (auto ss: num_nodes) {
145 15 : log.printf("%d, ", ss);
146 : }
147 : vector<double> temp_single_coeff, temp_single_bias;
148 10 : for (int ii = 0; ; ii ++) {
149 : // parse coeff
150 30 : if( !parseNumberedVector("WEIGHTS", ii, temp_single_coeff) ) {
151 5 : temp_single_coeff=weights[ii-1];
152 : break;
153 : }
154 10 : weights.push_back(temp_single_coeff);
155 10 : log.printf("size of temp_single_coeff = %lu\n", temp_single_coeff.size());
156 10 : log.printf("size of weights = %lu\n", weights.size());
157 : // parse bias
158 20 : if( !parseNumberedVector("BIASES", ii, temp_single_bias) ) {
159 0 : temp_single_bias=biases[ii-1];
160 : }
161 10 : biases.push_back(temp_single_bias);
162 10 : log.printf("size of temp_single_bias = %lu\n", temp_single_bias.size());
163 10 : log.printf("size of biases = %lu\n", biases.size());
164 : }
165 :
166 5 : if(getNumberOfArguments() != num_nodes[0]) {
167 0 : error("Number of arguments is wrong");
168 : }
169 :
170 5 : auto temp_coeff = weights;
171 15 : for (int ii = 0; ii < num_layers - 1; ii ++) {
172 : int num_of_rows, num_of_cols; // num of rows/cols for the coeff matrix of this connection
173 10 : num_of_rows = num_nodes[ii + 1];
174 10 : num_of_cols = num_nodes[ii];
175 : assert (num_of_rows * num_of_cols == temp_coeff[ii].size()); // check whether the size matches
176 : // create a 2d array to hold coefficients
177 10 : coeff[ii] = new double*[num_of_rows];
178 32 : for (int kk = 0; kk < num_of_rows; kk ++) {
179 22 : coeff[ii][kk] = new double[num_of_cols];
180 : }
181 61 : for (int jj = 0; jj < temp_coeff[ii].size(); jj ++) {
182 51 : coeff[ii][jj / num_of_cols][jj % num_of_cols] = temp_coeff[ii][jj];
183 : }
184 : }
185 : // check coeff
186 15 : for (int ii = 0; ii < num_layers - 1; ii ++) {
187 10 : log.printf("coeff %d = \n", ii);
188 32 : for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
189 73 : for (int kk = 0; kk < num_nodes[ii]; kk ++) {
190 51 : log.printf("%f ", coeff[ii][jj][kk]);
191 : }
192 22 : log.printf("\n");
193 : }
194 : }
195 : // check bias
196 15 : for (int ii = 0; ii < num_layers - 1; ii ++) {
197 10 : log.printf("bias %d = \n", ii);
198 32 : for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
199 22 : log.printf("%f ", biases[ii][jj]);
200 : }
201 10 : log.printf("\n");
202 : }
203 5 : log.printf("initialization ended\n");
204 : // create components
205 12 : for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
206 7 : string name_of_this_component = "node-" + to_string(ii);
207 7 : addComponentWithDerivatives(name_of_this_component);
208 7 : componentIsNotPeriodic(name_of_this_component);
209 : }
210 5 : checkRead();
211 10 : }
212 :
213 1638 : void ANN::calculate_output_of_each_layer(const vector<double>& input) {
214 : // first layer
215 1638 : output_of_each_layer[0] = input;
216 : // following layers
217 4914 : for(int ii = 1; ii < num_nodes.size(); ii ++) {
218 3276 : output_of_each_layer[ii].resize(num_nodes[ii]);
219 3276 : input_of_each_layer[ii].resize(num_nodes[ii]);
220 : // first calculate input
221 10374 : for (int jj = 0; jj < num_nodes[ii]; jj ++) {
222 7098 : input_of_each_layer[ii][jj] = biases[ii - 1][jj]; // add bias term
223 23478 : for (int kk = 0; kk < num_nodes[ii - 1]; kk ++) {
224 16380 : input_of_each_layer[ii][jj] += coeff[ii - 1][jj][kk] * output_of_each_layer[ii - 1][kk];
225 : }
226 : }
227 : // then get output
228 3276 : if (activations[ii - 1] == string("Linear")) {
229 1092 : for(int jj = 0; jj < num_nodes[ii]; jj ++) {
230 546 : output_of_each_layer[ii][jj] = input_of_each_layer[ii][jj];
231 : }
232 : }
233 2730 : else if (activations[ii - 1] == string("Tanh")) {
234 7644 : for(int jj = 0; jj < num_nodes[ii]; jj ++) {
235 5460 : output_of_each_layer[ii][jj] = tanh(input_of_each_layer[ii][jj]);
236 : }
237 : }
238 546 : else if (activations[ii - 1] == string("Circular")) {
239 : assert (num_nodes[ii] % 2 == 0);
240 1092 : for(int jj = 0; jj < num_nodes[ii] / 2; jj ++) {
241 546 : double radius = sqrt(input_of_each_layer[ii][2 * jj] * input_of_each_layer[ii][2 * jj]
242 546 : +input_of_each_layer[ii][2 * jj + 1] * input_of_each_layer[ii][2 * jj + 1]);
243 546 : output_of_each_layer[ii][2 * jj] = input_of_each_layer[ii][2 * jj] / radius;
244 546 : output_of_each_layer[ii][2 * jj + 1] = input_of_each_layer[ii][2 * jj + 1] / radius;
245 :
246 : }
247 : }
248 : else {
249 : printf("layer type not found!\n\n");
250 0 : return;
251 : }
252 : }
253 : #ifdef DEBUG_2
254 : // print out the result for debugging
255 : printf("output_of_each_layer = \n");
256 : // for (int ii = num_layers - 1; ii < num_layers; ii ++) {
257 : for (int ii = 0; ii < num_layers; ii ++) {
258 : printf("layer[%d]: ", ii);
259 : if (ii != 0) {
260 : cout << activations[ii - 1] << "\t";
261 : }
262 : else {
263 : cout << "input \t" ;
264 : }
265 : for (int jj = 0; jj < num_nodes[ii]; jj ++) {
266 : printf("%lf\t", output_of_each_layer[ii][jj]);
267 : }
268 : printf("\n");
269 : }
270 : printf("\n");
271 : #endif
272 : return;
273 : }
274 :
275 2184 : void ANN::back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component) {
276 2184 : derivatives_of_each_layer = output_of_each_layer; // the data structure and size should be the same, so I simply deep copy it
277 : // first calculate derivatives for bottleneck layer
278 5460 : for (int ii = 0; ii < num_nodes[num_nodes.size() - 1]; ii ++ ) {
279 3276 : if (ii == index_of_output_component) {
280 2184 : derivatives_of_each_layer[num_nodes.size() - 1][ii] = 1;
281 : }
282 : else {
283 1092 : derivatives_of_each_layer[num_nodes.size() - 1][ii] = 0;
284 : }
285 : }
286 : // the use back propagation to calculate derivatives for previous layers
287 6552 : for (int jj = num_nodes.size() - 2; jj >= 0; jj --) {
288 4368 : if (activations[jj] == string("Circular")) {
289 : vector<double> temp_derivative_of_input_for_this_layer;
290 1092 : temp_derivative_of_input_for_this_layer.resize(num_nodes[jj + 1]);
291 : #ifdef DEBUG
292 : assert (num_nodes[jj + 1] % 2 == 0);
293 : #endif
294 : // first calculate the derivative of input from derivative of output of this circular layer
295 2184 : for(int ii = 0; ii < num_nodes[jj + 1] / 2; ii ++) {
296 : // printf("size of input_of_each_layer[%d] = %d\n",jj, input_of_each_layer[jj].size());
297 1092 : double x_p = input_of_each_layer[jj + 1][2 * ii];
298 1092 : double x_q = input_of_each_layer[jj + 1][2 * ii + 1];
299 1092 : double radius = sqrt(x_p * x_p + x_q * x_q);
300 1092 : temp_derivative_of_input_for_this_layer[2 * ii] = x_q / (radius * radius * radius)
301 1092 : * (x_q * derivatives_of_each_layer[jj + 1][2 * ii]
302 1092 : - x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]);
303 1092 : temp_derivative_of_input_for_this_layer[2 * ii + 1] = x_p / (radius * radius * radius)
304 1092 : * (x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]
305 1092 : - x_q * derivatives_of_each_layer[jj + 1][2 * ii]);
306 : }
307 : #ifdef DEBUG
308 : for (int mm = 0; mm < num_nodes[jj + 1]; mm ++) {
309 : printf("temp_derivative_of_input_for_this_layer[%d] = %lf\n", mm, temp_derivative_of_input_for_this_layer[mm]);
310 : printf("derivatives_of_each_layer[%d + 1][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj + 1][mm]);
311 : }
312 : #endif
313 : // the calculate the derivative of output of layer jj, from derivative of input of layer (jj + 1)
314 4368 : for (int mm = 0; mm < num_nodes[jj]; mm ++) {
315 3276 : derivatives_of_each_layer[jj][mm] = 0;
316 9828 : for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
317 6552 : derivatives_of_each_layer[jj][mm] += coeff[jj][kk][mm] \
318 6552 : * temp_derivative_of_input_for_this_layer[kk];
319 : #ifdef DEBUG
320 : printf("derivatives_of_each_layer[%d][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj][mm]);
321 : printf("coeff[%d][%d][%d] = %lf\n", jj, kk, mm, coeff[jj][kk][mm]);
322 : #endif
323 : }
324 : }
325 : // TODO: should be fine, pass all tests, although there seems to be some problems here previously
326 : }
327 : else {
328 10920 : for (int mm = 0; mm < num_nodes[jj]; mm ++) {
329 7644 : derivatives_of_each_layer[jj][mm] = 0;
330 24024 : for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
331 16380 : if (activations[jj] == string("Tanh")) {
332 : // printf("tanh\n");
333 14742 : derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
334 14742 : * coeff[jj][kk][mm] \
335 14742 : * (1 - output_of_each_layer[jj + 1][kk] * output_of_each_layer[jj + 1][kk]);
336 : }
337 1638 : else if (activations[jj] == string("Linear")) {
338 : // printf("linear\n");
339 1638 : derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
340 1638 : * coeff[jj][kk][mm] \
341 1638 : * 1;
342 : }
343 : else {
344 : printf("layer type not found!\n\n");
345 0 : return;
346 : }
347 : }
348 : }
349 : }
350 : }
351 : #ifdef DEBUG
352 : // print out the result for debugging
353 : printf("derivatives_of_each_layer = \n");
354 : for (int ii = 0; ii < num_layers; ii ++) {
355 : printf("layer[%d]: ", ii);
356 : for (int jj = 0; jj < num_nodes[ii]; jj ++) {
357 : printf("%lf\t", derivatives_of_each_layer[ii][jj]);
358 : }
359 : printf("\n");
360 : }
361 : printf("\n");
362 : #endif
363 : return;
364 : }
365 :
366 1638 : void ANN::calculate() {
367 :
368 1638 : vector<double> input_layer_data(num_nodes[0]);
369 4914 : for (int ii = 0; ii < num_nodes[0]; ii ++) {
370 3276 : input_layer_data[ii] = getArgument(ii);
371 : }
372 :
373 1638 : calculate_output_of_each_layer(input_layer_data);
374 : vector<vector<double> > derivatives_of_each_layer;
375 :
376 3822 : for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
377 2184 : back_prop(derivatives_of_each_layer, ii);
378 2184 : string name_of_this_component = "node-" + to_string(ii);
379 2184 : Value* value_new=getPntrToComponent(name_of_this_component);
380 2184 : value_new -> set(output_of_each_layer[num_layers - 1][ii]);
381 6552 : for (int jj = 0; jj < num_nodes[0]; jj ++) {
382 4368 : value_new -> setDerivative(jj, derivatives_of_each_layer[0][jj]); // TODO: setDerivative or addDerivative?
383 : }
384 : #ifdef DEBUG_3
385 : printf("derivatives = ");
386 : for (int jj = 0; jj < num_nodes[0]; jj ++) {
387 : printf("%f ", value_new -> getDerivative(jj));
388 : }
389 : printf("\n");
390 : #endif
391 : }
392 :
393 3276 : }
394 :
395 : }
396 : }
397 : }
|