Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : MIT License
3 :
4 : Copyright (c) 2019 Wei Chen and Andrew Ferguson
5 :
6 : Permission is hereby granted, free of charge, to any person obtaining a copy
7 : of this software and associated documentation files (the "Software"), to deal
8 : in the Software without restriction, including without limitation the rights
9 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 : copies of the Software, and to permit persons to whom the Software is
11 : furnished to do so, subject to the following conditions:
12 :
13 : The above copyright notice and this permission notice shall be included in all
14 : copies or substantial portions of the Software.
15 :
16 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 : SOFTWARE.
23 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
24 : #include "function/Function.h"
25 : #include "core/ActionRegister.h"
26 : #include "cassert"
27 :
28 : #include <string>
29 : #include <cmath>
30 : #include <iostream>
31 : // #include <stdio.h>
32 :
33 : using namespace std;
34 :
35 : // #define DEBUG
36 : // #define DEBUG_2
37 : // #define DEBUG_3
38 :
39 : namespace PLMD {
40 : namespace function {
41 : namespace annfunc {
42 :
43 : //+PLUMEDOC ANNMOD_Function ANN
44 : /*
45 : Calculates the ANN-function.
46 :
47 : This module implements ANN class, which is a subclass of Function class.
48 : ANN class takes multi-dimensional arrays as inputs for a fully-connected feedforward neural network
49 : with specified neural network weights and generates corresponding outputs.
50 : The ANN outputs can be used as collective variables, inputs for other collective variables,
51 : or inputs for data analysis tools.
52 :
53 : \par Examples
54 :
55 : Assume we have an ANN with numbers of nodes being [2, 3, 1], and weights connecting layer 0 and 1 are
56 :
57 : [[1,2], [3,4], [5,6]]
58 :
59 : weights connecting layer 1 and 2 are
60 :
61 : [[7,8,9]]
62 :
63 : Bias for layer 1 and 2 are [10, 11, 12] and [13], respectively.
64 :
65 : All activation functions are Tanh.
66 :
67 : Then if input variables are l_0_out_0, l_0_out_1, the corresponding ANN function object can be defined using
68 : following plumed script:
69 :
70 : \plumedfile
71 : ANN ...
72 : LABEL=ann
73 : ARG=l_0_out_0,l_0_out_1
74 : NUM_LAYERS=3
75 : NUM_NODES=2,3,1
76 : ACTIVATIONS=Tanh,Tanh
77 : WEIGHTS0=1,2,3,4,5,6
78 : WEIGHTS1=7,8,9
79 : BIASES0=10,11,12
80 : BIASES1=13
81 : ... ANN
82 : \endplumedfile
83 :
84 : To access its components, we use "ann.node-0", "ann.node-1", ..., which represents the components of neural network outputs.
85 :
86 :
87 : */
88 : //+ENDPLUMEDOC
89 :
90 : class ANN : public Function {
91 : private:
92 : int num_layers;
93 : vector<int> num_nodes;
94 : vector<string> activations; // activation functions
95 : vector<vector<double> > weights; // flattened weight arrays
96 : vector<vector<double> > biases;
97 : vector<vector<double> > output_of_each_layer;
98 : vector<vector<double> > input_of_each_layer;
99 : vector<double** > coeff; // weight matrix arrays, reshaped from "weights"
100 :
101 : public:
102 : static void registerKeywords( Keywords& keys );
103 : explicit ANN(const ActionOptions&);
104 : virtual void calculate();
105 : void calculate_output_of_each_layer(const vector<double>& input);
106 : void back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component);
107 : };
108 :
109 : PLUMED_REGISTER_ACTION(ANN,"ANN")
110 :
111 7 : void ANN::registerKeywords( Keywords& keys ) {
112 7 : Function::registerKeywords(keys);
113 7 : keys.use("PERIODIC");
114 7 : keys.add("compulsory", "NUM_LAYERS", "number of layers of the neural network");
115 7 : keys.add("compulsory", "NUM_NODES", "numbers of nodes in each layer of the neural network");
116 7 : keys.add("compulsory", "ACTIVATIONS", "activation functions for the neural network");
117 7 : keys.add("numbered", "WEIGHTS", "flattened weight arrays connecting adjacent layers, "
118 : "WEIGHTS0 represents flattened weight array connecting layer 0 and layer 1, "
119 : "WEIGHTS1 represents flattened weight array connecting layer 1 and layer 2, ...");
120 7 : keys.add("numbered", "BIASES", "bias array for each layer of the neural network, "
121 : "BIASES0 represents bias array for layer 1, BIASES1 represents bias array for layer 2, ...");
122 : // since v2.2 plumed requires all components be registered
123 14 : keys.addOutputComponent("node", "default", "scalar", "components of ANN outputs");
124 7 : }
125 :
126 5 : ANN::ANN(const ActionOptions&ao):
127 : Action(ao),
128 5 : Function(ao) {
129 5 : parse("NUM_LAYERS", num_layers);
130 5 : num_nodes = vector<int>(num_layers);
131 10 : activations = vector<string>(num_layers - 1);
132 10 : output_of_each_layer = vector<vector<double> >(num_layers);
133 10 : input_of_each_layer = vector<vector<double> >(num_layers);
134 5 : coeff = vector<double** >(num_layers - 1);
135 5 : parseVector("NUM_NODES", num_nodes);
136 5 : parseVector("ACTIVATIONS", activations);
137 5 : log.printf("\nactivations = ");
138 15 : for (const auto & ss: activations) {
139 10 : log.printf("%s, ", ss.c_str());
140 : }
141 5 : log.printf("\nnum_nodes = ");
142 20 : for (auto ss: num_nodes) {
143 15 : log.printf("%d, ", ss);
144 : }
145 : vector<double> temp_single_coeff, temp_single_bias;
146 10 : for (int ii = 0; ; ii ++) {
147 : // parse coeff
148 30 : if( !parseNumberedVector("WEIGHTS", ii, temp_single_coeff) ) {
149 5 : temp_single_coeff=weights[ii-1];
150 : break;
151 : }
152 10 : weights.push_back(temp_single_coeff);
153 10 : log.printf("size of temp_single_coeff = %lu\n", temp_single_coeff.size());
154 10 : log.printf("size of weights = %lu\n", weights.size());
155 : // parse bias
156 20 : if( !parseNumberedVector("BIASES", ii, temp_single_bias) ) {
157 0 : temp_single_bias=biases[ii-1];
158 : }
159 10 : biases.push_back(temp_single_bias);
160 10 : log.printf("size of temp_single_bias = %lu\n", temp_single_bias.size());
161 10 : log.printf("size of biases = %lu\n", biases.size());
162 : }
163 :
164 5 : if(getNumberOfArguments() != num_nodes[0]) {
165 0 : error("Number of arguments is wrong");
166 : }
167 :
168 5 : auto temp_coeff = weights;
169 15 : for (int ii = 0; ii < num_layers - 1; ii ++) {
170 : int num_of_rows, num_of_cols; // num of rows/cols for the coeff matrix of this connection
171 10 : num_of_rows = num_nodes[ii + 1];
172 10 : num_of_cols = num_nodes[ii];
173 : assert (num_of_rows * num_of_cols == temp_coeff[ii].size()); // check whether the size matches
174 : // create a 2d array to hold coefficients
175 10 : coeff[ii] = new double*[num_of_rows];
176 32 : for (int kk = 0; kk < num_of_rows; kk ++) {
177 22 : coeff[ii][kk] = new double[num_of_cols];
178 : }
179 61 : for (int jj = 0; jj < temp_coeff[ii].size(); jj ++) {
180 51 : coeff[ii][jj / num_of_cols][jj % num_of_cols] = temp_coeff[ii][jj];
181 : }
182 : }
183 : // check coeff
184 15 : for (int ii = 0; ii < num_layers - 1; ii ++) {
185 10 : log.printf("coeff %d = \n", ii);
186 32 : for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
187 73 : for (int kk = 0; kk < num_nodes[ii]; kk ++) {
188 51 : log.printf("%f ", coeff[ii][jj][kk]);
189 : }
190 22 : log.printf("\n");
191 : }
192 : }
193 : // check bias
194 15 : for (int ii = 0; ii < num_layers - 1; ii ++) {
195 10 : log.printf("bias %d = \n", ii);
196 32 : for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
197 22 : log.printf("%f ", biases[ii][jj]);
198 : }
199 10 : log.printf("\n");
200 : }
201 5 : log.printf("initialization ended\n");
202 : // create components
203 12 : for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
204 7 : string name_of_this_component = "node-" + to_string(ii);
205 7 : addComponentWithDerivatives(name_of_this_component);
206 7 : componentIsNotPeriodic(name_of_this_component);
207 : }
208 5 : checkRead();
209 10 : }
210 :
211 1638 : void ANN::calculate_output_of_each_layer(const vector<double>& input) {
212 : // first layer
213 1638 : output_of_each_layer[0] = input;
214 : // following layers
215 4914 : for(int ii = 1; ii < num_nodes.size(); ii ++) {
216 3276 : output_of_each_layer[ii].resize(num_nodes[ii]);
217 3276 : input_of_each_layer[ii].resize(num_nodes[ii]);
218 : // first calculate input
219 10374 : for (int jj = 0; jj < num_nodes[ii]; jj ++) {
220 7098 : input_of_each_layer[ii][jj] = biases[ii - 1][jj]; // add bias term
221 23478 : for (int kk = 0; kk < num_nodes[ii - 1]; kk ++) {
222 16380 : input_of_each_layer[ii][jj] += coeff[ii - 1][jj][kk] * output_of_each_layer[ii - 1][kk];
223 : }
224 : }
225 : // then get output
226 3276 : if (activations[ii - 1] == string("Linear")) {
227 1092 : for(int jj = 0; jj < num_nodes[ii]; jj ++) {
228 546 : output_of_each_layer[ii][jj] = input_of_each_layer[ii][jj];
229 : }
230 2730 : } else if (activations[ii - 1] == string("Tanh")) {
231 7644 : for(int jj = 0; jj < num_nodes[ii]; jj ++) {
232 5460 : output_of_each_layer[ii][jj] = tanh(input_of_each_layer[ii][jj]);
233 : }
234 546 : } else if (activations[ii - 1] == string("Circular")) {
235 : assert (num_nodes[ii] % 2 == 0);
236 1092 : for(int jj = 0; jj < num_nodes[ii] / 2; jj ++) {
237 546 : double radius = sqrt(input_of_each_layer[ii][2 * jj] * input_of_each_layer[ii][2 * jj]
238 546 : +input_of_each_layer[ii][2 * jj + 1] * input_of_each_layer[ii][2 * jj + 1]);
239 546 : output_of_each_layer[ii][2 * jj] = input_of_each_layer[ii][2 * jj] / radius;
240 546 : output_of_each_layer[ii][2 * jj + 1] = input_of_each_layer[ii][2 * jj + 1] / radius;
241 :
242 : }
243 : } else {
244 : printf("layer type not found!\n\n");
245 0 : return;
246 : }
247 : }
248 : #ifdef DEBUG_2
249 : // print out the result for debugging
250 : printf("output_of_each_layer = \n");
251 : // for (int ii = num_layers - 1; ii < num_layers; ii ++) {
252 : for (int ii = 0; ii < num_layers; ii ++) {
253 : printf("layer[%d]: ", ii);
254 : if (ii != 0) {
255 : cout << activations[ii - 1] << "\t";
256 : } else {
257 : cout << "input \t" ;
258 : }
259 : for (int jj = 0; jj < num_nodes[ii]; jj ++) {
260 : printf("%lf\t", output_of_each_layer[ii][jj]);
261 : }
262 : printf("\n");
263 : }
264 : printf("\n");
265 : #endif
266 : return;
267 : }
268 :
269 2184 : void ANN::back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component) {
270 2184 : derivatives_of_each_layer = output_of_each_layer; // the data structure and size should be the same, so I simply deep copy it
271 : // first calculate derivatives for bottleneck layer
272 5460 : for (int ii = 0; ii < num_nodes[num_nodes.size() - 1]; ii ++ ) {
273 3276 : if (ii == index_of_output_component) {
274 2184 : derivatives_of_each_layer[num_nodes.size() - 1][ii] = 1;
275 : } else {
276 1092 : derivatives_of_each_layer[num_nodes.size() - 1][ii] = 0;
277 : }
278 : }
279 : // the use back propagation to calculate derivatives for previous layers
280 6552 : for (int jj = num_nodes.size() - 2; jj >= 0; jj --) {
281 4368 : if (activations[jj] == string("Circular")) {
282 : vector<double> temp_derivative_of_input_for_this_layer;
283 1092 : temp_derivative_of_input_for_this_layer.resize(num_nodes[jj + 1]);
284 : #ifdef DEBUG
285 : assert (num_nodes[jj + 1] % 2 == 0);
286 : #endif
287 : // first calculate the derivative of input from derivative of output of this circular layer
288 2184 : for(int ii = 0; ii < num_nodes[jj + 1] / 2; ii ++) {
289 : // printf("size of input_of_each_layer[%d] = %d\n",jj, input_of_each_layer[jj].size());
290 1092 : double x_p = input_of_each_layer[jj + 1][2 * ii];
291 1092 : double x_q = input_of_each_layer[jj + 1][2 * ii + 1];
292 1092 : double radius = sqrt(x_p * x_p + x_q * x_q);
293 1092 : temp_derivative_of_input_for_this_layer[2 * ii] = x_q / (radius * radius * radius)
294 1092 : * (x_q * derivatives_of_each_layer[jj + 1][2 * ii]
295 1092 : - x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]);
296 1092 : temp_derivative_of_input_for_this_layer[2 * ii + 1] = x_p / (radius * radius * radius)
297 1092 : * (x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]
298 1092 : - x_q * derivatives_of_each_layer[jj + 1][2 * ii]);
299 : }
300 : #ifdef DEBUG
301 : for (int mm = 0; mm < num_nodes[jj + 1]; mm ++) {
302 : printf("temp_derivative_of_input_for_this_layer[%d] = %lf\n", mm, temp_derivative_of_input_for_this_layer[mm]);
303 : printf("derivatives_of_each_layer[%d + 1][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj + 1][mm]);
304 : }
305 : #endif
306 : // the calculate the derivative of output of layer jj, from derivative of input of layer (jj + 1)
307 4368 : for (int mm = 0; mm < num_nodes[jj]; mm ++) {
308 3276 : derivatives_of_each_layer[jj][mm] = 0;
309 9828 : for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
310 6552 : derivatives_of_each_layer[jj][mm] += coeff[jj][kk][mm] \
311 6552 : * temp_derivative_of_input_for_this_layer[kk];
312 : #ifdef DEBUG
313 : printf("derivatives_of_each_layer[%d][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj][mm]);
314 : printf("coeff[%d][%d][%d] = %lf\n", jj, kk, mm, coeff[jj][kk][mm]);
315 : #endif
316 : }
317 : }
318 : // TODO: should be fine, pass all tests, although there seems to be some problems here previously
319 : } else {
320 10920 : for (int mm = 0; mm < num_nodes[jj]; mm ++) {
321 7644 : derivatives_of_each_layer[jj][mm] = 0;
322 24024 : for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
323 16380 : if (activations[jj] == string("Tanh")) {
324 : // printf("tanh\n");
325 14742 : derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
326 14742 : * coeff[jj][kk][mm] \
327 14742 : * (1 - output_of_each_layer[jj + 1][kk] * output_of_each_layer[jj + 1][kk]);
328 1638 : } else if (activations[jj] == string("Linear")) {
329 : // printf("linear\n");
330 1638 : derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
331 1638 : * coeff[jj][kk][mm] \
332 1638 : * 1;
333 : } else {
334 : printf("layer type not found!\n\n");
335 0 : return;
336 : }
337 : }
338 : }
339 : }
340 : }
341 : #ifdef DEBUG
342 : // print out the result for debugging
343 : printf("derivatives_of_each_layer = \n");
344 : for (int ii = 0; ii < num_layers; ii ++) {
345 : printf("layer[%d]: ", ii);
346 : for (int jj = 0; jj < num_nodes[ii]; jj ++) {
347 : printf("%lf\t", derivatives_of_each_layer[ii][jj]);
348 : }
349 : printf("\n");
350 : }
351 : printf("\n");
352 : #endif
353 : return;
354 : }
355 :
356 1638 : void ANN::calculate() {
357 :
358 1638 : vector<double> input_layer_data(num_nodes[0]);
359 4914 : for (int ii = 0; ii < num_nodes[0]; ii ++) {
360 3276 : input_layer_data[ii] = getArgument(ii);
361 : }
362 :
363 1638 : calculate_output_of_each_layer(input_layer_data);
364 : vector<vector<double> > derivatives_of_each_layer;
365 :
366 3822 : for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
367 2184 : back_prop(derivatives_of_each_layer, ii);
368 2184 : string name_of_this_component = "node-" + to_string(ii);
369 2184 : Value* value_new=getPntrToComponent(name_of_this_component);
370 2184 : value_new -> set(output_of_each_layer[num_layers - 1][ii]);
371 6552 : for (int jj = 0; jj < num_nodes[0]; jj ++) {
372 4368 : value_new -> setDerivative(jj, derivatives_of_each_layer[0][jj]); // TODO: setDerivative or addDerivative?
373 : }
374 : #ifdef DEBUG_3
375 : printf("derivatives = ");
376 : for (int jj = 0; jj < num_nodes[0]; jj ++) {
377 : printf("%f ", value_new -> getDerivative(jj));
378 : }
379 : printf("\n");
380 : #endif
381 : }
382 :
383 3276 : }
384 :
385 : }
386 : }
387 : }
|