LCOV - code coverage report
Current view: top level - annfunc - ANN.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 133 137 97.1 %
Date: 2024-10-18 13:59:31 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             : MIT License
       3             : 
       4             : Copyright (c) 2019 Wei Chen and Andrew Ferguson
       5             : 
       6             : Permission is hereby granted, free of charge, to any person obtaining a copy
       7             : of this software and associated documentation files (the "Software"), to deal
       8             : in the Software without restriction, including without limitation the rights
       9             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      10             : copies of the Software, and to permit persons to whom the Software is
      11             : furnished to do so, subject to the following conditions:
      12             : 
      13             : The above copyright notice and this permission notice shall be included in all
      14             : copies or substantial portions of the Software.
      15             : 
      16             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      17             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      18             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      19             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      20             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      21             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      22             : SOFTWARE.
      23             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      24             : #include "function/Function.h"
      25             : #include "core/ActionRegister.h"
      26             : #include "cassert"
      27             : 
      28             : #include <string>
      29             : #include <cmath>
      30             : #include <iostream>
      31             : // #include <stdio.h>
      32             : 
      33             : using namespace std;
      34             : 
      35             : // #define DEBUG
      36             : // #define DEBUG_2
      37             : // #define DEBUG_3
      38             : 
      39             : namespace PLMD {
      40             : namespace function {
      41             : namespace annfunc {
      42             : 
      43             : //+PLUMEDOC ANNMOD_Function ANN
      44             : /*
      45             : Calculates the ANN-function.
      46             : 
      47             : This module implements ANN class, which is a subclass of Function class.
      48             : ANN class takes multi-dimensional arrays as inputs for a fully-connected feedforward neural network
      49             : with specified neural network weights and generates corresponding outputs.
      50             : The ANN outputs can be used as collective variables, inputs for other collective variables,
      51             : or inputs for data analysis tools.
      52             : 
      53             : \par Examples
      54             : 
      55             : Assume we have an ANN with numbers of nodes being [2, 3, 1], and weights connecting layer 0 and 1 are
      56             : 
      57             : [[1,2], [3,4], [5,6]]
      58             : 
      59             : weights connecting layer 1 and 2 are
      60             : 
      61             : [[7,8,9]]
      62             : 
      63             : Bias for layer 1 and 2 are [10, 11, 12] and [13], respectively.
      64             : 
      65             : All activation functions are Tanh.
      66             : 
      67             : Then if input variables are l_0_out_0, l_0_out_1, the corresponding ANN function object can be defined using
      68             : following plumed script:
      69             : 
      70             : \plumedfile
      71             : ANN ...
      72             : LABEL=ann
      73             : ARG=l_0_out_0,l_0_out_1
      74             : NUM_LAYERS=3
      75             : NUM_NODES=2,3,1
      76             : ACTIVATIONS=Tanh,Tanh
      77             : WEIGHTS0=1,2,3,4,5,6
      78             : WEIGHTS1=7,8,9
      79             : BIASES0=10,11,12
      80             : BIASES1=13
      81             : ... ANN
      82             : \endplumedfile
      83             : 
      84             : To access its components, we use "ann.node-0", "ann.node-1", ..., which represents the components of neural network outputs.
      85             : 
      86             : 
      87             : */
      88             : //+ENDPLUMEDOC
      89             : 
      90             : class ANN : public Function
      91             : {
      92             : private:
      93             :   int num_layers;
      94             :   vector<int> num_nodes;
      95             :   vector<string> activations;   // activation functions
      96             :   vector<vector<double> > weights;  // flattened weight arrays
      97             :   vector<vector<double> > biases;
      98             :   vector<vector<double> > output_of_each_layer;
      99             :   vector<vector<double> > input_of_each_layer;
     100             :   vector<double** > coeff;  // weight matrix arrays, reshaped from "weights"
     101             : 
     102             : public:
     103             :   static void registerKeywords( Keywords& keys );
     104             :   explicit ANN(const ActionOptions&);
     105             :   virtual void calculate();
     106             :   void calculate_output_of_each_layer(const vector<double>& input);
     107             :   void back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component);
     108             : };
     109             : 
     110             : PLUMED_REGISTER_ACTION(ANN,"ANN")
     111             : 
     112           7 : void ANN::registerKeywords( Keywords& keys ) {
     113           7 :   Function::registerKeywords(keys);
     114           7 :   keys.use("PERIODIC");
     115          14 :   keys.add("compulsory", "NUM_LAYERS", "number of layers of the neural network");
     116          14 :   keys.add("compulsory", "NUM_NODES", "numbers of nodes in each layer of the neural network");
     117          14 :   keys.add("compulsory", "ACTIVATIONS", "activation functions for the neural network");
     118          14 :   keys.add("numbered", "WEIGHTS", "flattened weight arrays connecting adjacent layers, "
     119             :            "WEIGHTS0 represents flattened weight array connecting layer 0 and layer 1, "
     120             :            "WEIGHTS1 represents flattened weight array connecting layer 1 and layer 2, ...");
     121          14 :   keys.add("numbered", "BIASES", "bias array for each layer of the neural network, "
     122             :            "BIASES0 represents bias array for layer 1, BIASES1 represents bias array for layer 2, ...");
     123             :   // since v2.2 plumed requires all components be registered
     124          14 :   keys.addOutputComponent("node", "default", "scalar", "components of ANN outputs");
     125           7 : }
     126             : 
     127           5 : ANN::ANN(const ActionOptions&ao):
     128             :   Action(ao),
     129           5 :   Function(ao)
     130             : {
     131           5 :   parse("NUM_LAYERS", num_layers);
     132           5 :   num_nodes = vector<int>(num_layers);
     133          10 :   activations = vector<string>(num_layers - 1);
     134          10 :   output_of_each_layer = vector<vector<double> >(num_layers);
     135          10 :   input_of_each_layer = vector<vector<double> >(num_layers);
     136           5 :   coeff = vector<double** >(num_layers - 1);
     137           5 :   parseVector("NUM_NODES", num_nodes);
     138           5 :   parseVector("ACTIVATIONS", activations);
     139           5 :   log.printf("\nactivations = ");
     140          15 :   for (const auto & ss: activations) {
     141          10 :     log.printf("%s, ", ss.c_str());
     142             :   }
     143           5 :   log.printf("\nnum_nodes = ");
     144          20 :   for (auto ss: num_nodes) {
     145          15 :     log.printf("%d, ", ss);
     146             :   }
     147             :   vector<double> temp_single_coeff, temp_single_bias;
     148          10 :   for (int ii = 0; ; ii ++) {
     149             :     // parse coeff
     150          30 :     if( !parseNumberedVector("WEIGHTS", ii, temp_single_coeff) ) {
     151           5 :       temp_single_coeff=weights[ii-1];
     152             :       break;
     153             :     }
     154          10 :     weights.push_back(temp_single_coeff);
     155          10 :     log.printf("size of temp_single_coeff = %lu\n", temp_single_coeff.size());
     156          10 :     log.printf("size of weights = %lu\n", weights.size());
     157             :     // parse bias
     158          20 :     if( !parseNumberedVector("BIASES", ii, temp_single_bias) ) {
     159           0 :       temp_single_bias=biases[ii-1];
     160             :     }
     161          10 :     biases.push_back(temp_single_bias);
     162          10 :     log.printf("size of temp_single_bias = %lu\n", temp_single_bias.size());
     163          10 :     log.printf("size of biases = %lu\n", biases.size());
     164             :   }
     165             : 
     166           5 :   if(getNumberOfArguments() != num_nodes[0]) {
     167           0 :     error("Number of arguments is wrong");
     168             :   }
     169             : 
     170           5 :   auto temp_coeff = weights;
     171          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     172             :     int num_of_rows, num_of_cols; // num of rows/cols for the coeff matrix of this connection
     173          10 :     num_of_rows = num_nodes[ii + 1];
     174          10 :     num_of_cols = num_nodes[ii];
     175             :     assert (num_of_rows * num_of_cols == temp_coeff[ii].size()); // check whether the size matches
     176             :     // create a 2d array to hold coefficients
     177          10 :     coeff[ii] = new double*[num_of_rows];
     178          32 :     for (int kk = 0; kk < num_of_rows; kk ++) {
     179          22 :       coeff[ii][kk] = new double[num_of_cols];
     180             :     }
     181          61 :     for (int jj = 0; jj < temp_coeff[ii].size(); jj ++) {
     182          51 :       coeff[ii][jj / num_of_cols][jj % num_of_cols] = temp_coeff[ii][jj];
     183             :     }
     184             :   }
     185             :   // check coeff
     186          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     187          10 :     log.printf("coeff %d = \n", ii);
     188          32 :     for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
     189          73 :       for (int kk = 0; kk < num_nodes[ii]; kk ++) {
     190          51 :         log.printf("%f ", coeff[ii][jj][kk]);
     191             :       }
     192          22 :       log.printf("\n");
     193             :     }
     194             :   }
     195             :   // check bias
     196          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     197          10 :     log.printf("bias %d = \n", ii);
     198          32 :     for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
     199          22 :       log.printf("%f ", biases[ii][jj]);
     200             :     }
     201          10 :     log.printf("\n");
     202             :   }
     203           5 :   log.printf("initialization ended\n");
     204             :   // create components
     205          12 :   for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
     206           7 :     string name_of_this_component = "node-" + to_string(ii);
     207           7 :     addComponentWithDerivatives(name_of_this_component);
     208           7 :     componentIsNotPeriodic(name_of_this_component);
     209             :   }
     210           5 :   checkRead();
     211          10 : }
     212             : 
     213        1638 : void ANN::calculate_output_of_each_layer(const vector<double>& input) {
     214             :   // first layer
     215        1638 :   output_of_each_layer[0] = input;
     216             :   // following layers
     217        4914 :   for(int ii = 1; ii < num_nodes.size(); ii ++) {
     218        3276 :     output_of_each_layer[ii].resize(num_nodes[ii]);
     219        3276 :     input_of_each_layer[ii].resize(num_nodes[ii]);
     220             :     // first calculate input
     221       10374 :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     222        7098 :       input_of_each_layer[ii][jj] = biases[ii - 1][jj];  // add bias term
     223       23478 :       for (int kk = 0; kk < num_nodes[ii - 1]; kk ++) {
     224       16380 :         input_of_each_layer[ii][jj] += coeff[ii - 1][jj][kk] * output_of_each_layer[ii - 1][kk];
     225             :       }
     226             :     }
     227             :     // then get output
     228        3276 :     if (activations[ii - 1] == string("Linear")) {
     229        1092 :       for(int jj = 0; jj < num_nodes[ii]; jj ++) {
     230         546 :         output_of_each_layer[ii][jj] = input_of_each_layer[ii][jj];
     231             :       }
     232             :     }
     233        2730 :     else if (activations[ii - 1] == string("Tanh")) {
     234        7644 :       for(int jj = 0; jj < num_nodes[ii]; jj ++) {
     235        5460 :         output_of_each_layer[ii][jj] = tanh(input_of_each_layer[ii][jj]);
     236             :       }
     237             :     }
     238         546 :     else if (activations[ii - 1] == string("Circular")) {
     239             :       assert (num_nodes[ii] % 2 == 0);
     240        1092 :       for(int jj = 0; jj < num_nodes[ii] / 2; jj ++) {
     241         546 :         double radius = sqrt(input_of_each_layer[ii][2 * jj] * input_of_each_layer[ii][2 * jj]
     242         546 :                              +input_of_each_layer[ii][2 * jj + 1] * input_of_each_layer[ii][2 * jj + 1]);
     243         546 :         output_of_each_layer[ii][2 * jj] = input_of_each_layer[ii][2 * jj] / radius;
     244         546 :         output_of_each_layer[ii][2 * jj + 1] = input_of_each_layer[ii][2 * jj + 1] / radius;
     245             : 
     246             :       }
     247             :     }
     248             :     else {
     249             :       printf("layer type not found!\n\n");
     250           0 :       return;
     251             :     }
     252             :   }
     253             : #ifdef DEBUG_2
     254             :   // print out the result for debugging
     255             :   printf("output_of_each_layer = \n");
     256             :   // for (int ii = num_layers - 1; ii < num_layers; ii ++) {
     257             :   for (int ii = 0; ii < num_layers; ii ++) {
     258             :     printf("layer[%d]: ", ii);
     259             :     if (ii != 0) {
     260             :       cout << activations[ii - 1] << "\t";
     261             :     }
     262             :     else {
     263             :       cout << "input \t" ;
     264             :     }
     265             :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     266             :       printf("%lf\t", output_of_each_layer[ii][jj]);
     267             :     }
     268             :     printf("\n");
     269             :   }
     270             :   printf("\n");
     271             : #endif
     272             :   return;
     273             : }
     274             : 
     275        2184 : void ANN::back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component) {
     276        2184 :   derivatives_of_each_layer = output_of_each_layer;  // the data structure and size should be the same, so I simply deep copy it
     277             :   // first calculate derivatives for bottleneck layer
     278        5460 :   for (int ii = 0; ii < num_nodes[num_nodes.size() - 1]; ii ++ ) {
     279        3276 :     if (ii == index_of_output_component) {
     280        2184 :       derivatives_of_each_layer[num_nodes.size() - 1][ii] = 1;
     281             :     }
     282             :     else {
     283        1092 :       derivatives_of_each_layer[num_nodes.size() - 1][ii] = 0;
     284             :     }
     285             :   }
     286             :   // the use back propagation to calculate derivatives for previous layers
     287        6552 :   for (int jj = num_nodes.size() - 2; jj >= 0; jj --) {
     288        4368 :     if (activations[jj] == string("Circular")) {
     289             :       vector<double> temp_derivative_of_input_for_this_layer;
     290        1092 :       temp_derivative_of_input_for_this_layer.resize(num_nodes[jj + 1]);
     291             : #ifdef DEBUG
     292             :       assert (num_nodes[jj + 1] % 2 == 0);
     293             : #endif
     294             :       // first calculate the derivative of input from derivative of output of this circular layer
     295        2184 :       for(int ii = 0; ii < num_nodes[jj + 1] / 2; ii ++) {
     296             :         // printf("size of input_of_each_layer[%d] = %d\n",jj,  input_of_each_layer[jj].size());
     297        1092 :         double x_p = input_of_each_layer[jj + 1][2 * ii];
     298        1092 :         double x_q = input_of_each_layer[jj + 1][2 * ii + 1];
     299        1092 :         double radius = sqrt(x_p * x_p + x_q * x_q);
     300        1092 :         temp_derivative_of_input_for_this_layer[2 * ii] = x_q / (radius * radius * radius)
     301        1092 :             * (x_q * derivatives_of_each_layer[jj + 1][2 * ii]
     302        1092 :                - x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]);
     303        1092 :         temp_derivative_of_input_for_this_layer[2 * ii + 1] = x_p / (radius * radius * radius)
     304        1092 :             * (x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]
     305        1092 :                - x_q * derivatives_of_each_layer[jj + 1][2 * ii]);
     306             :       }
     307             : #ifdef DEBUG
     308             :       for (int mm = 0; mm < num_nodes[jj + 1]; mm ++) {
     309             :         printf("temp_derivative_of_input_for_this_layer[%d] = %lf\n", mm, temp_derivative_of_input_for_this_layer[mm]);
     310             :         printf("derivatives_of_each_layer[%d + 1][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj + 1][mm]);
     311             :       }
     312             : #endif
     313             :       // the calculate the derivative of output of layer jj, from derivative of input of layer (jj + 1)
     314        4368 :       for (int mm = 0; mm < num_nodes[jj]; mm ++) {
     315        3276 :         derivatives_of_each_layer[jj][mm] = 0;
     316        9828 :         for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
     317        6552 :           derivatives_of_each_layer[jj][mm] += coeff[jj][kk][mm] \
     318        6552 :                                                * temp_derivative_of_input_for_this_layer[kk];
     319             : #ifdef DEBUG
     320             :           printf("derivatives_of_each_layer[%d][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj][mm]);
     321             :           printf("coeff[%d][%d][%d] = %lf\n", jj, kk, mm, coeff[jj][kk][mm]);
     322             : #endif
     323             :         }
     324             :       }
     325             :       // TODO: should be fine, pass all tests, although there seems to be some problems here previously
     326             :     }
     327             :     else {
     328       10920 :       for (int mm = 0; mm < num_nodes[jj]; mm ++) {
     329        7644 :         derivatives_of_each_layer[jj][mm] = 0;
     330       24024 :         for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
     331       16380 :           if (activations[jj] == string("Tanh")) {
     332             :             // printf("tanh\n");
     333       14742 :             derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
     334       14742 :                                                  * coeff[jj][kk][mm] \
     335       14742 :                                                  * (1 - output_of_each_layer[jj + 1][kk] * output_of_each_layer[jj + 1][kk]);
     336             :           }
     337        1638 :           else if (activations[jj] == string("Linear")) {
     338             :             // printf("linear\n");
     339        1638 :             derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
     340        1638 :                                                  * coeff[jj][kk][mm] \
     341        1638 :                                                  * 1;
     342             :           }
     343             :           else {
     344             :             printf("layer type not found!\n\n");
     345           0 :             return;
     346             :           }
     347             :         }
     348             :       }
     349             :     }
     350             :   }
     351             : #ifdef DEBUG
     352             :   // print out the result for debugging
     353             :   printf("derivatives_of_each_layer = \n");
     354             :   for (int ii = 0; ii < num_layers; ii ++) {
     355             :     printf("layer[%d]: ", ii);
     356             :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     357             :       printf("%lf\t", derivatives_of_each_layer[ii][jj]);
     358             :     }
     359             :     printf("\n");
     360             :   }
     361             :   printf("\n");
     362             : #endif
     363             :   return;
     364             : }
     365             : 
     366        1638 : void ANN::calculate() {
     367             : 
     368        1638 :   vector<double> input_layer_data(num_nodes[0]);
     369        4914 :   for (int ii = 0; ii < num_nodes[0]; ii ++) {
     370        3276 :     input_layer_data[ii] = getArgument(ii);
     371             :   }
     372             : 
     373        1638 :   calculate_output_of_each_layer(input_layer_data);
     374             :   vector<vector<double> > derivatives_of_each_layer;
     375             : 
     376        3822 :   for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
     377        2184 :     back_prop(derivatives_of_each_layer, ii);
     378        2184 :     string name_of_this_component = "node-" + to_string(ii);
     379        2184 :     Value* value_new=getPntrToComponent(name_of_this_component);
     380        2184 :     value_new -> set(output_of_each_layer[num_layers - 1][ii]);
     381        6552 :     for (int jj = 0; jj < num_nodes[0]; jj ++) {
     382        4368 :       value_new -> setDerivative(jj, derivatives_of_each_layer[0][jj]);  // TODO: setDerivative or addDerivative?
     383             :     }
     384             : #ifdef DEBUG_3
     385             :     printf("derivatives = ");
     386             :     for (int jj = 0; jj < num_nodes[0]; jj ++) {
     387             :       printf("%f ", value_new -> getDerivative(jj));
     388             :     }
     389             :     printf("\n");
     390             : #endif
     391             :   }
     392             : 
     393        3276 : }
     394             : 
     395             : }
     396             : }
     397             : }

Generated by: LCOV version 1.16