LCOV - code coverage report
Current view: top level - annfunc - ANN.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 133 137 97.1 %
Date: 2025-03-25 09:33:27 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             : MIT License
       3             : 
       4             : Copyright (c) 2019 Wei Chen and Andrew Ferguson
       5             : 
       6             : Permission is hereby granted, free of charge, to any person obtaining a copy
       7             : of this software and associated documentation files (the "Software"), to deal
       8             : in the Software without restriction, including without limitation the rights
       9             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      10             : copies of the Software, and to permit persons to whom the Software is
      11             : furnished to do so, subject to the following conditions:
      12             : 
      13             : The above copyright notice and this permission notice shall be included in all
      14             : copies or substantial portions of the Software.
      15             : 
      16             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      17             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      18             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      19             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      20             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      21             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      22             : SOFTWARE.
      23             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      24             : #include "function/Function.h"
      25             : #include "core/ActionRegister.h"
      26             : #include "cassert"
      27             : 
      28             : #include <string>
      29             : #include <cmath>
      30             : #include <iostream>
      31             : // #include <stdio.h>
      32             : 
      33             : using namespace std;
      34             : 
      35             : // #define DEBUG
      36             : // #define DEBUG_2
      37             : // #define DEBUG_3
      38             : 
      39             : namespace PLMD {
      40             : namespace function {
      41             : namespace annfunc {
      42             : 
      43             : //+PLUMEDOC ANNMOD_Function ANN
      44             : /*
      45             : Calculates the ANN-function.
      46             : 
      47             : This module implements ANN class, which is a subclass of Function class.
      48             : ANN class takes multi-dimensional arrays as inputs for a fully-connected feedforward neural network
      49             : with specified neural network weights and generates corresponding outputs.
      50             : The ANN outputs can be used as collective variables, inputs for other collective variables,
      51             : or inputs for data analysis tools.
      52             : 
      53             : \par Examples
      54             : 
      55             : Assume we have an ANN with numbers of nodes being [2, 3, 1], and weights connecting layer 0 and 1 are
      56             : 
      57             : [[1,2], [3,4], [5,6]]
      58             : 
      59             : weights connecting layer 1 and 2 are
      60             : 
      61             : [[7,8,9]]
      62             : 
      63             : Bias for layer 1 and 2 are [10, 11, 12] and [13], respectively.
      64             : 
      65             : All activation functions are Tanh.
      66             : 
      67             : Then if input variables are l_0_out_0, l_0_out_1, the corresponding ANN function object can be defined using
      68             : following plumed script:
      69             : 
      70             : \plumedfile
      71             : ANN ...
      72             : LABEL=ann
      73             : ARG=l_0_out_0,l_0_out_1
      74             : NUM_LAYERS=3
      75             : NUM_NODES=2,3,1
      76             : ACTIVATIONS=Tanh,Tanh
      77             : WEIGHTS0=1,2,3,4,5,6
      78             : WEIGHTS1=7,8,9
      79             : BIASES0=10,11,12
      80             : BIASES1=13
      81             : ... ANN
      82             : \endplumedfile
      83             : 
      84             : To access its components, we use "ann.node-0", "ann.node-1", ..., which represents the components of neural network outputs.
      85             : 
      86             : 
      87             : */
      88             : //+ENDPLUMEDOC
      89             : 
      90             : class ANN : public Function {
      91             : private:
      92             :   int num_layers;
      93             :   vector<int> num_nodes;
      94             :   vector<string> activations;   // activation functions
      95             :   vector<vector<double> > weights;  // flattened weight arrays
      96             :   vector<vector<double> > biases;
      97             :   vector<vector<double> > output_of_each_layer;
      98             :   vector<vector<double> > input_of_each_layer;
      99             :   vector<double** > coeff;  // weight matrix arrays, reshaped from "weights"
     100             : 
     101             : public:
     102             :   static void registerKeywords( Keywords& keys );
     103             :   explicit ANN(const ActionOptions&);
     104             :   virtual void calculate();
     105             :   void calculate_output_of_each_layer(const vector<double>& input);
     106             :   void back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component);
     107             : };
     108             : 
     109             : PLUMED_REGISTER_ACTION(ANN,"ANN")
     110             : 
     111           7 : void ANN::registerKeywords( Keywords& keys ) {
     112           7 :   Function::registerKeywords(keys);
     113           7 :   keys.use("PERIODIC");
     114           7 :   keys.add("compulsory", "NUM_LAYERS", "number of layers of the neural network");
     115           7 :   keys.add("compulsory", "NUM_NODES", "numbers of nodes in each layer of the neural network");
     116           7 :   keys.add("compulsory", "ACTIVATIONS", "activation functions for the neural network");
     117           7 :   keys.add("numbered", "WEIGHTS", "flattened weight arrays connecting adjacent layers, "
     118             :            "WEIGHTS0 represents flattened weight array connecting layer 0 and layer 1, "
     119             :            "WEIGHTS1 represents flattened weight array connecting layer 1 and layer 2, ...");
     120           7 :   keys.add("numbered", "BIASES", "bias array for each layer of the neural network, "
     121             :            "BIASES0 represents bias array for layer 1, BIASES1 represents bias array for layer 2, ...");
     122             :   // since v2.2 plumed requires all components be registered
     123          14 :   keys.addOutputComponent("node", "default", "scalar", "components of ANN outputs");
     124           7 : }
     125             : 
     126           5 : ANN::ANN(const ActionOptions&ao):
     127             :   Action(ao),
     128           5 :   Function(ao) {
     129           5 :   parse("NUM_LAYERS", num_layers);
     130           5 :   num_nodes = vector<int>(num_layers);
     131          10 :   activations = vector<string>(num_layers - 1);
     132          10 :   output_of_each_layer = vector<vector<double> >(num_layers);
     133          10 :   input_of_each_layer = vector<vector<double> >(num_layers);
     134           5 :   coeff = vector<double** >(num_layers - 1);
     135           5 :   parseVector("NUM_NODES", num_nodes);
     136           5 :   parseVector("ACTIVATIONS", activations);
     137           5 :   log.printf("\nactivations = ");
     138          15 :   for (const auto & ss: activations) {
     139          10 :     log.printf("%s, ", ss.c_str());
     140             :   }
     141           5 :   log.printf("\nnum_nodes = ");
     142          20 :   for (auto ss: num_nodes) {
     143          15 :     log.printf("%d, ", ss);
     144             :   }
     145             :   vector<double> temp_single_coeff, temp_single_bias;
     146          10 :   for (int ii = 0; ; ii ++) {
     147             :     // parse coeff
     148          30 :     if( !parseNumberedVector("WEIGHTS", ii, temp_single_coeff) ) {
     149           5 :       temp_single_coeff=weights[ii-1];
     150             :       break;
     151             :     }
     152          10 :     weights.push_back(temp_single_coeff);
     153          10 :     log.printf("size of temp_single_coeff = %lu\n", temp_single_coeff.size());
     154          10 :     log.printf("size of weights = %lu\n", weights.size());
     155             :     // parse bias
     156          20 :     if( !parseNumberedVector("BIASES", ii, temp_single_bias) ) {
     157           0 :       temp_single_bias=biases[ii-1];
     158             :     }
     159          10 :     biases.push_back(temp_single_bias);
     160          10 :     log.printf("size of temp_single_bias = %lu\n", temp_single_bias.size());
     161          10 :     log.printf("size of biases = %lu\n", biases.size());
     162             :   }
     163             : 
     164           5 :   if(getNumberOfArguments() != num_nodes[0]) {
     165           0 :     error("Number of arguments is wrong");
     166             :   }
     167             : 
     168           5 :   auto temp_coeff = weights;
     169          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     170             :     int num_of_rows, num_of_cols; // num of rows/cols for the coeff matrix of this connection
     171          10 :     num_of_rows = num_nodes[ii + 1];
     172          10 :     num_of_cols = num_nodes[ii];
     173             :     assert (num_of_rows * num_of_cols == temp_coeff[ii].size()); // check whether the size matches
     174             :     // create a 2d array to hold coefficients
     175          10 :     coeff[ii] = new double*[num_of_rows];
     176          32 :     for (int kk = 0; kk < num_of_rows; kk ++) {
     177          22 :       coeff[ii][kk] = new double[num_of_cols];
     178             :     }
     179          61 :     for (int jj = 0; jj < temp_coeff[ii].size(); jj ++) {
     180          51 :       coeff[ii][jj / num_of_cols][jj % num_of_cols] = temp_coeff[ii][jj];
     181             :     }
     182             :   }
     183             :   // check coeff
     184          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     185          10 :     log.printf("coeff %d = \n", ii);
     186          32 :     for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
     187          73 :       for (int kk = 0; kk < num_nodes[ii]; kk ++) {
     188          51 :         log.printf("%f ", coeff[ii][jj][kk]);
     189             :       }
     190          22 :       log.printf("\n");
     191             :     }
     192             :   }
     193             :   // check bias
     194          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     195          10 :     log.printf("bias %d = \n", ii);
     196          32 :     for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
     197          22 :       log.printf("%f ", biases[ii][jj]);
     198             :     }
     199          10 :     log.printf("\n");
     200             :   }
     201           5 :   log.printf("initialization ended\n");
     202             :   // create components
     203          12 :   for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
     204           7 :     string name_of_this_component = "node-" + to_string(ii);
     205           7 :     addComponentWithDerivatives(name_of_this_component);
     206           7 :     componentIsNotPeriodic(name_of_this_component);
     207             :   }
     208           5 :   checkRead();
     209          10 : }
     210             : 
     211        1638 : void ANN::calculate_output_of_each_layer(const vector<double>& input) {
     212             :   // first layer
     213        1638 :   output_of_each_layer[0] = input;
     214             :   // following layers
     215        4914 :   for(int ii = 1; ii < num_nodes.size(); ii ++) {
     216        3276 :     output_of_each_layer[ii].resize(num_nodes[ii]);
     217        3276 :     input_of_each_layer[ii].resize(num_nodes[ii]);
     218             :     // first calculate input
     219       10374 :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     220        7098 :       input_of_each_layer[ii][jj] = biases[ii - 1][jj];  // add bias term
     221       23478 :       for (int kk = 0; kk < num_nodes[ii - 1]; kk ++) {
     222       16380 :         input_of_each_layer[ii][jj] += coeff[ii - 1][jj][kk] * output_of_each_layer[ii - 1][kk];
     223             :       }
     224             :     }
     225             :     // then get output
     226        3276 :     if (activations[ii - 1] == string("Linear")) {
     227        1092 :       for(int jj = 0; jj < num_nodes[ii]; jj ++) {
     228         546 :         output_of_each_layer[ii][jj] = input_of_each_layer[ii][jj];
     229             :       }
     230        2730 :     } else if (activations[ii - 1] == string("Tanh")) {
     231        7644 :       for(int jj = 0; jj < num_nodes[ii]; jj ++) {
     232        5460 :         output_of_each_layer[ii][jj] = tanh(input_of_each_layer[ii][jj]);
     233             :       }
     234         546 :     } else if (activations[ii - 1] == string("Circular")) {
     235             :       assert (num_nodes[ii] % 2 == 0);
     236        1092 :       for(int jj = 0; jj < num_nodes[ii] / 2; jj ++) {
     237         546 :         double radius = sqrt(input_of_each_layer[ii][2 * jj] * input_of_each_layer[ii][2 * jj]
     238         546 :                              +input_of_each_layer[ii][2 * jj + 1] * input_of_each_layer[ii][2 * jj + 1]);
     239         546 :         output_of_each_layer[ii][2 * jj] = input_of_each_layer[ii][2 * jj] / radius;
     240         546 :         output_of_each_layer[ii][2 * jj + 1] = input_of_each_layer[ii][2 * jj + 1] / radius;
     241             : 
     242             :       }
     243             :     } else {
     244             :       printf("layer type not found!\n\n");
     245           0 :       return;
     246             :     }
     247             :   }
     248             : #ifdef DEBUG_2
     249             :   // print out the result for debugging
     250             :   printf("output_of_each_layer = \n");
     251             :   // for (int ii = num_layers - 1; ii < num_layers; ii ++) {
     252             :   for (int ii = 0; ii < num_layers; ii ++) {
     253             :     printf("layer[%d]: ", ii);
     254             :     if (ii != 0) {
     255             :       cout << activations[ii - 1] << "\t";
     256             :     } else {
     257             :       cout << "input \t" ;
     258             :     }
     259             :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     260             :       printf("%lf\t", output_of_each_layer[ii][jj]);
     261             :     }
     262             :     printf("\n");
     263             :   }
     264             :   printf("\n");
     265             : #endif
     266             :   return;
     267             : }
     268             : 
     269        2184 : void ANN::back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component) {
     270        2184 :   derivatives_of_each_layer = output_of_each_layer;  // the data structure and size should be the same, so I simply deep copy it
     271             :   // first calculate derivatives for bottleneck layer
     272        5460 :   for (int ii = 0; ii < num_nodes[num_nodes.size() - 1]; ii ++ ) {
     273        3276 :     if (ii == index_of_output_component) {
     274        2184 :       derivatives_of_each_layer[num_nodes.size() - 1][ii] = 1;
     275             :     } else {
     276        1092 :       derivatives_of_each_layer[num_nodes.size() - 1][ii] = 0;
     277             :     }
     278             :   }
     279             :   // the use back propagation to calculate derivatives for previous layers
     280        6552 :   for (int jj = num_nodes.size() - 2; jj >= 0; jj --) {
     281        4368 :     if (activations[jj] == string("Circular")) {
     282             :       vector<double> temp_derivative_of_input_for_this_layer;
     283        1092 :       temp_derivative_of_input_for_this_layer.resize(num_nodes[jj + 1]);
     284             : #ifdef DEBUG
     285             :       assert (num_nodes[jj + 1] % 2 == 0);
     286             : #endif
     287             :       // first calculate the derivative of input from derivative of output of this circular layer
     288        2184 :       for(int ii = 0; ii < num_nodes[jj + 1] / 2; ii ++) {
     289             :         // printf("size of input_of_each_layer[%d] = %d\n",jj,  input_of_each_layer[jj].size());
     290        1092 :         double x_p = input_of_each_layer[jj + 1][2 * ii];
     291        1092 :         double x_q = input_of_each_layer[jj + 1][2 * ii + 1];
     292        1092 :         double radius = sqrt(x_p * x_p + x_q * x_q);
     293        1092 :         temp_derivative_of_input_for_this_layer[2 * ii] = x_q / (radius * radius * radius)
     294        1092 :             * (x_q * derivatives_of_each_layer[jj + 1][2 * ii]
     295        1092 :                - x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]);
     296        1092 :         temp_derivative_of_input_for_this_layer[2 * ii + 1] = x_p / (radius * radius * radius)
     297        1092 :             * (x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]
     298        1092 :                - x_q * derivatives_of_each_layer[jj + 1][2 * ii]);
     299             :       }
     300             : #ifdef DEBUG
     301             :       for (int mm = 0; mm < num_nodes[jj + 1]; mm ++) {
     302             :         printf("temp_derivative_of_input_for_this_layer[%d] = %lf\n", mm, temp_derivative_of_input_for_this_layer[mm]);
     303             :         printf("derivatives_of_each_layer[%d + 1][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj + 1][mm]);
     304             :       }
     305             : #endif
     306             :       // the calculate the derivative of output of layer jj, from derivative of input of layer (jj + 1)
     307        4368 :       for (int mm = 0; mm < num_nodes[jj]; mm ++) {
     308        3276 :         derivatives_of_each_layer[jj][mm] = 0;
     309        9828 :         for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
     310        6552 :           derivatives_of_each_layer[jj][mm] += coeff[jj][kk][mm] \
     311        6552 :                                                * temp_derivative_of_input_for_this_layer[kk];
     312             : #ifdef DEBUG
     313             :           printf("derivatives_of_each_layer[%d][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj][mm]);
     314             :           printf("coeff[%d][%d][%d] = %lf\n", jj, kk, mm, coeff[jj][kk][mm]);
     315             : #endif
     316             :         }
     317             :       }
     318             :       // TODO: should be fine, pass all tests, although there seems to be some problems here previously
     319             :     } else {
     320       10920 :       for (int mm = 0; mm < num_nodes[jj]; mm ++) {
     321        7644 :         derivatives_of_each_layer[jj][mm] = 0;
     322       24024 :         for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
     323       16380 :           if (activations[jj] == string("Tanh")) {
     324             :             // printf("tanh\n");
     325       14742 :             derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
     326       14742 :                                                  * coeff[jj][kk][mm] \
     327       14742 :                                                  * (1 - output_of_each_layer[jj + 1][kk] * output_of_each_layer[jj + 1][kk]);
     328        1638 :           } else if (activations[jj] == string("Linear")) {
     329             :             // printf("linear\n");
     330        1638 :             derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
     331        1638 :                                                  * coeff[jj][kk][mm] \
     332        1638 :                                                  * 1;
     333             :           } else {
     334             :             printf("layer type not found!\n\n");
     335           0 :             return;
     336             :           }
     337             :         }
     338             :       }
     339             :     }
     340             :   }
     341             : #ifdef DEBUG
     342             :   // print out the result for debugging
     343             :   printf("derivatives_of_each_layer = \n");
     344             :   for (int ii = 0; ii < num_layers; ii ++) {
     345             :     printf("layer[%d]: ", ii);
     346             :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     347             :       printf("%lf\t", derivatives_of_each_layer[ii][jj]);
     348             :     }
     349             :     printf("\n");
     350             :   }
     351             :   printf("\n");
     352             : #endif
     353             :   return;
     354             : }
     355             : 
     356        1638 : void ANN::calculate() {
     357             : 
     358        1638 :   vector<double> input_layer_data(num_nodes[0]);
     359        4914 :   for (int ii = 0; ii < num_nodes[0]; ii ++) {
     360        3276 :     input_layer_data[ii] = getArgument(ii);
     361             :   }
     362             : 
     363        1638 :   calculate_output_of_each_layer(input_layer_data);
     364             :   vector<vector<double> > derivatives_of_each_layer;
     365             : 
     366        3822 :   for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
     367        2184 :     back_prop(derivatives_of_each_layer, ii);
     368        2184 :     string name_of_this_component = "node-" + to_string(ii);
     369        2184 :     Value* value_new=getPntrToComponent(name_of_this_component);
     370        2184 :     value_new -> set(output_of_each_layer[num_layers - 1][ii]);
     371        6552 :     for (int jj = 0; jj < num_nodes[0]; jj ++) {
     372        4368 :       value_new -> setDerivative(jj, derivatives_of_each_layer[0][jj]);  // TODO: setDerivative or addDerivative?
     373             :     }
     374             : #ifdef DEBUG_3
     375             :     printf("derivatives = ");
     376             :     for (int jj = 0; jj < num_nodes[0]; jj ++) {
     377             :       printf("%f ", value_new -> getDerivative(jj));
     378             :     }
     379             :     printf("\n");
     380             : #endif
     381             :   }
     382             : 
     383        3276 : }
     384             : 
     385             : }
     386             : }
     387             : }

Generated by: LCOV version 1.16