LCOV - code coverage report
Current view: top level - ves - Opt_BachAveragedSGD.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 68 71 95.8 %
Date: 2020-11-18 11:20:57 Functions: 10 12 83.3 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2016-2018 The VES code team
       3             :    (see the PEOPLE-VES file at the root of this folder for a list of names)
       4             : 
       5             :    See http://www.ves-code.org for more information.
       6             : 
       7             :    This file is part of VES code module.
       8             : 
       9             :    The VES code module is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    The VES code module is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with the VES code module.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : 
      23             : #include "Optimizer.h"
      24             : #include "CoeffsVector.h"
      25             : #include "CoeffsMatrix.h"
      26             : 
      27             : #include "core/ActionRegister.h"
      28             : #include "core/PlumedMain.h"
      29             : 
      30             : 
      31             : 
      32             : namespace PLMD {
      33             : namespace ves {
      34             : 
      35             : //+PLUMEDOC VES_OPTIMIZER OPT_AVERAGED_SGD
      36             : /*
      37             : Averaged stochastic gradient decent with fixed step size.
      38             : 
      39             : \par Algorithim
      40             : 
      41             : This optimizer updates the coefficients according to the averaged stochastic gradient decent algorithm described in ref \cite Bach-NIPS-2013. This algorithm considers two sets of coefficients, the so-called instantaneous coefficients that are updated according to the recursion formula given by
      42             : \f[
      43             : \boldsymbol{\alpha}^{(n+1)} = \boldsymbol{\alpha}^{(n)} -
      44             : \mu \left[
      45             : \nabla \Omega(\bar{\boldsymbol{\alpha}}^{(n)}) +
      46             : \mathbf{H}(\bar{\boldsymbol{\alpha}}^{(n)})
      47             : [\boldsymbol{\alpha}^{(n)}-\bar{\boldsymbol{\alpha}}^{(n)}]
      48             : \right],
      49             : \f]
      50             : where \f$\mu\f$ is a fixed step size and the gradient \f$ \nabla\Omega(\bar{\boldsymbol{\alpha}}^{(n)})\f$ and the Hessian \f$\mathbf{H}(\bar{\boldsymbol{\alpha}}^{(n)})\f$ depend on the averaged coefficients defined as
      51             : \f[
      52             : \bar{\boldsymbol{\alpha}}^{(n)} = \frac{1}{n+1} \sum_{k=0}^{n} \boldsymbol{\alpha}^{(k)}.
      53             : \f]
      54             : This means that the bias acting on the system depends on the averaged coefficients \f$\bar{\boldsymbol{\alpha}}^{(n)}\f$ which leads to a smooth convergence of the bias and the estimated free energy surface. Furthermore, this allows for a rather short sampling time for each iteration, for classical MD simulations typical sampling times are on the order of few ps (around 1000-4000 MD steps).
      55             : 
      56             : Currently it is only supported to employ the diagonal part of the Hessian which is generally sufficient. Support for employing the full Hessian will be added later on.
      57             : 
      58             : The VES bias that is to be optimized should be specified using the
      59             : BIAS keyword.
      60             : The fixed step size \f$\mu\f$ is given using the STEPSIZE keyword.
      61             : The frequency of updating the coefficients is given using the
      62             : STRIDE keyword where the value is given in the number of MD steps.
      63             : For example, if the MD time step is 0.02 ps and STRIDE=2000 will the
      64             : coefficients be updated every 4 ps.
      65             : The coefficients will be outputted to the file given by the
      66             : COEFFS_FILE keyword. How often the coefficients are written
      67             : to this file is controlled by the COEFFS_OUTPUT keyword.
      68             : 
      69             : If the VES bias employes a dynamic target distribution that needes to be
      70             : iteratively updated (e.g. \ref TD_WELLTEMPERED) \cite Valsson-JCTC-2015, you will need to specify
      71             : the stride for updating the target distribution by using
      72             : the TARGETDIST_STRIDE keyword where the stride
      73             : is given in terms coefficent iterations. For example if the
      74             : MD time step is 0.02 ps and STRIDE=1000, such that the coefficients
      75             : are updated every 2 ps, will TARGETDIST_STRIDE=500 mean that the
      76             : target distribution will be updated every 1000 ps.
      77             : 
      78             : The output of FESs and biases is controlled by the FES_OUTPUT and the BIAS_OUTPUT
      79             : keywords. It is also possible to output one-dimensional projections of the FESs
      80             : by using the FES_PROJ_OUTPUT keyword but for that to work you will need to select
      81             : for which argument to do the projections by using the numbered PROJ_ARG keyword in
      82             : the VES bias that is optimized.
      83             : You can also output dynamic target distributions by using the
      84             : TARGETDIST_OUTPUT and TARGETDIST_PROJ_OUTPUT keywords.
      85             : 
      86             : It is possible to start the optimization from some initial set of
      87             : coefficients that have been previously obtained by using the INITIAL_COEFFS
      88             : keyword.
      89             : 
      90             : When restarting simulations it should be sufficent to put the \ref RESTART action
      91             : in the beginning of the input files (or some MD codes the PLUMED should automatically
      92             : detect if it is a restart run) and keep the same input as before The restarting of
      93             : the optimization should be automatic as the optimizer will then read in the
      94             : coefficients from the file given in COEFFS_FILE. For dynamic target
      95             : distribution the code will also read in the final target distribution from the
      96             : previous run (which is always outputted even if the TARGETDIST_OUTPUT keyword
      97             : is not used).
      98             : 
      99             : This optimizer supports the usage of multiple walkers where different copies of the system share the same bias potential (i.e. coefficients) and cooperatively sample the averages needed for the gradient and Hessian. This can significantly help with convergence in difficult cases. It is of course best to start the different copies from different positions in CV space. To activate this option you just need to add the MULTIPLE_WALKERS flag. Note that this is only supported if the MD code support running multiple replicas connected via MPI.
     100             : 
     101             : The optimizer supports the usage of a so-called mask file that can be used to employ different step sizes for different coefficents and/or deactive the optimization of certain coefficients (by putting values of 0.0). The mask file is read in by using the MASK_FILE keyword and should be in the same format as the coefficent file. It is possible to generate a template mask file by using the OUTPUT_MASK_FILE keyword.
     102             : 
     103             : \par Examples
     104             : 
     105             : In the following input we emloy an averaged stochastic gradient decent with a
     106             : fixed step size of 1.0 and update the coefficent every 1000 MD steps
     107             : (e.g. every 2 ps if the MD time step is 0.02 ps). The coefficent are outputted
     108             : to the coeffs.data every 50 iterations while the FES and bias is outputted
     109             : to files every 500 iterations (e.g. every 1000 ps).
     110             : \plumedfile
     111             : phi:   TORSION ATOMS=5,7,9,15
     112             : 
     113             : bf1: BF_FOURIER ORDER=5 MINIMUM=-pi MAXIMUM=pi
     114             : 
     115             : VES_LINEAR_EXPANSION ...
     116             :  ARG=phi
     117             :  BASIS_FUNCTIONS=bf1
     118             :  LABEL=ves1
     119             :  TEMP=300.0
     120             :  GRID_BINS=100
     121             : ... VES_LINEAR_EXPANSION
     122             : 
     123             : OPT_AVERAGED_SGD ...
     124             :   BIAS=ves1
     125             :   STRIDE=1000
     126             :   LABEL=o1
     127             :   STEPSIZE=1.0
     128             :   COEFFS_FILE=coeffs.data
     129             :   COEFFS_OUTPUT=50
     130             :   FES_OUTPUT=500
     131             :   BIAS_OUTPUT=500
     132             : ... OPT_AVERAGED_SGD
     133             : \endplumedfile
     134             : 
     135             : 
     136             : In the following example we employ a well-tempered target distribution that
     137             : is updated every 500 iterations (e.g. every 1000 ps). The target distribution is
     138             : also output to a file every 2000 iterations (the TARGETDIST_OUTPUT keyword).
     139             : Here we also employ MULTIPLE_WALKERS flag to enable the usage of
     140             : multiple walkers.
     141             : \plumedfile
     142             : phi:   TORSION ATOMS=5,7,9,15
     143             : psi:   TORSION ATOMS=7,9,15,17
     144             : 
     145             : bf1: BF_FOURIER ORDER=5 MINIMUM=-pi MAXIMUM=pi
     146             : bf2: BF_FOURIER ORDER=4 MINIMUM=-pi MAXIMUM=pi
     147             : 
     148             : td1: TD_WELLTEMPERED BIASFACTOR=10
     149             : 
     150             : VES_LINEAR_EXPANSION ...
     151             :  ARG=phi,psi
     152             :  BASIS_FUNCTIONS=bf1,bf2
     153             :  LABEL=ves1
     154             :  TEMP=300.0
     155             :  GRID_BINS=100,100
     156             :  TARGET_DISTRIBUTION=td1
     157             :  PROJ_ARG1=phi
     158             :  PROJ_ARG2=psi
     159             : ... VES_LINEAR_EXPANSION
     160             : 
     161             : OPT_AVERAGED_SGD ...
     162             :   BIAS=ves1
     163             :   STRIDE=1000
     164             :   LABEL=o1
     165             :   STEPSIZE=1.0
     166             :   MULTIPLE_WALKERS
     167             :   COEFFS_FILE=coeffs.data
     168             :   COEFFS_OUTPUT=50
     169             :   FES_OUTPUT=500
     170             :   FES_PROJ_OUTPUT=500
     171             :   BIAS_OUTPUT=500
     172             :   TARGETDIST_STRIDE=500
     173             :   TARGETDIST_OUTPUT=2000
     174             : ... OPT_AVERAGED_SGD
     175             : \endplumedfile
     176             : 
     177             : 
     178             : 
     179             : */
     180             : //+ENDPLUMEDOC
     181             : 
     182             : class Opt_BachAveragedSGD : public Optimizer {
     183             : private:
     184             :   std::vector<CoeffsVector*> combinedgradient_pntrs_;
     185             :   unsigned int combinedgradient_wstride_;
     186             :   std::vector<OFile*> combinedgradientOFiles_;
     187             :   double decaying_aver_tau_;
     188             : private:
     189         120 :   CoeffsVector& CombinedGradient(const unsigned int c_id) const {return *combinedgradient_pntrs_[c_id];}
     190             :   double getAverDecay() const;
     191             : public:
     192             :   static void registerKeywords(Keywords&);
     193             :   explicit Opt_BachAveragedSGD(const ActionOptions&);
     194             :   ~Opt_BachAveragedSGD();
     195             :   void coeffsUpdate(const unsigned int c_id = 0);
     196             : };
     197             : 
     198             : 
     199        6522 : PLUMED_REGISTER_ACTION(Opt_BachAveragedSGD,"OPT_AVERAGED_SGD")
     200             : 
     201             : 
     202          71 : void Opt_BachAveragedSGD::registerKeywords(Keywords& keys) {
     203          71 :   Optimizer::registerKeywords(keys);
     204          71 :   Optimizer::useFixedStepSizeKeywords(keys);
     205          71 :   Optimizer::useMultipleWalkersKeywords(keys);
     206          71 :   Optimizer::useHessianKeywords(keys);
     207          71 :   Optimizer::useMaskKeywords(keys);
     208          71 :   Optimizer::useRestartKeywords(keys);
     209          71 :   Optimizer::useMonitorAverageGradientKeywords(keys);
     210          71 :   Optimizer::useDynamicTargetDistributionKeywords(keys);
     211         284 :   keys.add("hidden","COMBINED_GRADIENT_FILE","the name of output file for the combined gradient (gradient + Hessian term)");
     212         284 :   keys.add("hidden","COMBINED_GRADIENT_OUTPUT","how often the combined gradient should be written to file. This parameter is given as the number of bias iterations. It is by default 100 if COMBINED_GRADIENT_FILE is specficed");
     213         284 :   keys.add("hidden","COMBINED_GRADIENT_FMT","specify format for combined gradient file(s) (useful for decrease the number of digits in regtests)");
     214         284 :   keys.add("optional","EXP_DECAYING_AVER","calculate the averaged coefficients using exponentially decaying averaging using the decaying constant given here in the number of iterations");
     215          71 : }
     216             : 
     217             : 
     218         210 : Opt_BachAveragedSGD::~Opt_BachAveragedSGD() {
     219         158 :   for(unsigned int i=0; i<combinedgradient_pntrs_.size(); i++) {
     220           6 :     delete combinedgradient_pntrs_[i];
     221             :   }
     222         158 :   for(unsigned int i=0; i<combinedgradientOFiles_.size(); i++) {
     223           6 :     combinedgradientOFiles_[i]->close();
     224           6 :     delete combinedgradientOFiles_[i];
     225             :   }
     226         140 : }
     227             : 
     228             : 
     229          70 : Opt_BachAveragedSGD::Opt_BachAveragedSGD(const ActionOptions&ao):
     230             :   PLUMED_VES_OPTIMIZER_INIT(ao),
     231             :   combinedgradient_pntrs_(0),
     232             :   combinedgradient_wstride_(100),
     233             :   combinedgradientOFiles_(0),
     234          70 :   decaying_aver_tau_(0.0)
     235             : {
     236          70 :   log.printf("  Averaged stochastic gradient decent, see and cite ");
     237         210 :   log << plumed.cite("Bach and Moulines, NIPS 26, 773-781 (2013)");
     238          70 :   log.printf("\n");
     239          70 :   unsigned int decaying_aver_tau_int=0;
     240         140 :   parse("EXP_DECAYING_AVER",decaying_aver_tau_int);
     241          70 :   if(decaying_aver_tau_int>0) {
     242           2 :     decaying_aver_tau_ = static_cast<double>(decaying_aver_tau_int);
     243           2 :     log.printf("  Coefficients calculated using an exponentially decaying average with a decaying constant of %u iterations, see and cite ",decaying_aver_tau_int);
     244           6 :     log << plumed.cite("Invernizzi, Valsson, and Parrinello, Proc. Natl. Acad. Sci. USA 114, 3370-3374 (2017)");
     245           2 :     log.printf("\n");
     246             :   }
     247             :   //
     248          70 :   std::vector<std::string> combinedgradient_fnames;
     249         140 :   parseFilenames("COMBINED_GRADIENT_FILE",combinedgradient_fnames);
     250         140 :   parse("COMBINED_GRADIENT_OUTPUT",combinedgradient_wstride_);
     251          70 :   setupOFiles(combinedgradient_fnames,combinedgradientOFiles_,useMultipleWalkers());
     252          70 :   std::string combinedgradient_fmt="";
     253         140 :   parse("COMBINED_GRADIENT_FMT",combinedgradient_fmt);
     254          70 :   if(combinedgradient_fnames.size()>0) {
     255          18 :     for(unsigned int i=0; i<numberOfCoeffsSets(); i++) {
     256          18 :       CoeffsVector* combinedgradient_tmp = new CoeffsVector(*getGradientPntrs()[i]);
     257          12 :       std::string label = getGradientPntrs()[i]->getLabel();
     258           6 :       if(label.find("gradient")!=std::string::npos) {
     259          18 :         label.replace(label.find("gradient"), std::string("gradient").length(), "combined_gradient");
     260             :       }
     261             :       else {
     262             :         label += "_combined";
     263             :       }
     264           6 :       combinedgradient_tmp->setLabels(label);
     265           6 :       if(combinedgradient_fmt.size()>0) {
     266           6 :         combinedgradient_tmp->setOutputFmt(combinedgradient_fmt);
     267             :       }
     268           6 :       combinedgradient_pntrs_.push_back(combinedgradient_tmp);
     269             :     }
     270             :     //
     271           6 :     if(numberOfCoeffsSets()==1) {
     272          24 :       log.printf("  Combined gradient (gradient + Hessian term) will be written out to file %s every %u iterations\n",combinedgradientOFiles_[0]->getPath().c_str(),combinedgradient_wstride_);
     273             :     }
     274             :     else {
     275           0 :       log.printf("  Combined gradient (gradient + Hessian term) will be written out to the following files every %u iterations:\n",combinedgradient_wstride_);
     276           0 :       for(unsigned int i=0; i<combinedgradientOFiles_.size(); i++) {
     277           0 :         log.printf("   coefficient set %u: %s\n",i,combinedgradientOFiles_[i]->getPath().c_str());
     278             :       }
     279             :     }
     280             :   }
     281             :   //
     282             : 
     283          70 :   turnOnHessian();
     284          70 :   checkRead();
     285          70 : }
     286             : 
     287             : 
     288         670 : void Opt_BachAveragedSGD::coeffsUpdate(const unsigned int c_id) {
     289             :   //
     290         730 :   if(combinedgradientOFiles_.size()>0 && (getIterationCounter()+1)%combinedgradient_wstride_==0) {
     291         120 :     CombinedGradient(c_id).setValues( ( Gradient(c_id) + Hessian(c_id)*(AuxCoeffs(c_id)-Coeffs(c_id)) ) );
     292         120 :     combinedgradient_pntrs_[c_id]->setIterationCounterAndTime(getIterationCounter()+1,getTime());
     293         120 :     combinedgradient_pntrs_[c_id]->writeToFile(*combinedgradientOFiles_[c_id]);
     294             :   }
     295             :   //
     296             :   double aver_decay = getAverDecay();
     297        2680 :   AuxCoeffs(c_id) += - StepSize(c_id)*CoeffsMask(c_id) * ( Gradient(c_id) + Hessian(c_id)*(AuxCoeffs(c_id)-Coeffs(c_id)) );
     298             :   //AuxCoeffs() = AuxCoeffs() - StepSize() * ( Gradient() + Hessian()*(AuxCoeffs()-Coeffs()) );
     299        1340 :   Coeffs(c_id) += aver_decay * ( AuxCoeffs(c_id)-Coeffs(c_id) );
     300         670 : }
     301             : 
     302             : 
     303             : inline
     304             : double Opt_BachAveragedSGD::getAverDecay() const {
     305         670 :   double aver_decay = 1.0 / ( getIterationCounterDbl() + 1.0 );
     306         670 :   if(decaying_aver_tau_ > 0.0 && (getIterationCounterDbl() + 1.0) > decaying_aver_tau_) {
     307          14 :     aver_decay = 1.0 / decaying_aver_tau_;
     308             :   }
     309             :   return aver_decay;
     310             : }
     311             : 
     312             : 
     313             : }
     314        4839 : }

Generated by: LCOV version 1.13