LCOV - code coverage report
Current view: top level - ves - Opt_Adam.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 67 72 93.1 %
Date: 2025-03-25 09:33:27 Functions: 3 4 75.0 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2016-2021 The VES code team
       3             :    (see the PEOPLE-VES file at the root of this folder for a list of names)
       4             : 
       5             :    See http://www.ves-code.org for more information.
       6             : 
       7             :    This file is part of VES code module.
       8             : 
       9             :    The VES code module is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    The VES code module is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with the VES code module.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : 
      23             : #include "Optimizer.h"
      24             : #include "CoeffsVector.h"
      25             : 
      26             : #include "core/ActionRegister.h"
      27             : #include "core/ActionSet.h"
      28             : 
      29             : 
      30             : namespace PLMD {
      31             : namespace ves {
      32             : 
      33             : //+PLUMEDOC VES_OPTIMIZER OPT_ADAM
      34             : /*
      35             : Adaptive moment estimation (ADAM) optimizer.
      36             : 
      37             : \attention
      38             : __This optimizer is still experimental and not fully documented. The syntax might change. Restarting does not work. We recommend to use the averaged stochastic gradient decent optimizer (\ref OPT_AVERAGED_SGD) for now__.
      39             : 
      40             : 
      41             : \par Examples
      42             : 
      43             : */
      44             : //+ENDPLUMEDOC
      45             : 
      46             : class Opt_Adam: public Optimizer {
      47             : private:
      48             :   unsigned int time_;
      49             :   double beta_1_;
      50             :   double beta_2_;
      51             :   double epsilon_;
      52             :   double one_minus_weight_decay_;
      53             :   bool amsgrad_;
      54             :   bool adamw_;
      55             :   // 1st gradient moment uses the "AuxCoeffs", so only 2nd moment needs new CoeffVectors
      56             :   std::vector<std::unique_ptr<CoeffsVector>> var_coeffs_pntrs_;
      57             :   // used only for AMSGrad variant
      58             :   std::vector<std::unique_ptr<CoeffsVector>> varmax_coeffs_pntrs_;
      59             : protected:
      60             :   CoeffsVector& VarCoeffs(const unsigned int coeffs_id = 0) const;
      61             :   CoeffsVector& VarmaxCoeffs(const unsigned int coeffs_id = 0) const;
      62             : public:
      63             :   static void registerKeywords(Keywords&);
      64             :   explicit Opt_Adam(const ActionOptions&);
      65             :   void coeffsUpdate(const unsigned int c_id = 0) override;
      66             : };
      67             : 
      68             : inline
      69             : CoeffsVector& Opt_Adam::VarCoeffs(const unsigned int coeffs_id) const {
      70             :   return *var_coeffs_pntrs_[coeffs_id];
      71             : }
      72             : 
      73             : inline
      74             : CoeffsVector& Opt_Adam::VarmaxCoeffs(const unsigned int coeffs_id) const {
      75             :   return *varmax_coeffs_pntrs_[coeffs_id];
      76             : }
      77             : 
      78             : 
      79             : PLUMED_REGISTER_ACTION(Opt_Adam,"OPT_ADAM")
      80             : 
      81             : 
      82           4 : void Opt_Adam::registerKeywords(Keywords& keys) {
      83           4 :   Optimizer::registerKeywords(keys);
      84           4 :   Optimizer::useFixedStepSizeKeywords(keys);
      85           4 :   Optimizer::useMultipleWalkersKeywords(keys);
      86           4 :   Optimizer::useMaskKeywords(keys);
      87           4 :   Optimizer::useDynamicTargetDistributionKeywords(keys);
      88           4 :   keys.add("optional","BETA_1","Parameter for the first moment estimate. Defaults to 0.9");
      89           4 :   keys.add("optional","BETA_2","Parameter for the second moment estimate. Defaults to 0.999");
      90           4 :   keys.add("optional","EPSILON","Small parameter to avoid division by zero. Defaults to 1e-8");
      91           4 :   keys.add("optional","ADAMW_WEIGHT_DECAY","Weight decay parameter for the AdamW variant. Defaults to 0");
      92           4 :   keys.addFlag("AMSGRAD", false, "Use the AMSGrad variant");
      93           4 : }
      94             : 
      95             : 
      96           2 : Opt_Adam::Opt_Adam(const ActionOptions&ao):
      97             :   PLUMED_VES_OPTIMIZER_INIT(ao),
      98           2 :   time_(0),
      99           2 :   beta_1_(0.9),
     100           2 :   beta_2_(0.999),
     101           2 :   epsilon_(0.00000001),
     102           2 :   one_minus_weight_decay_(1.0),
     103           2 :   amsgrad_(false),
     104           2 :   adamw_(false),
     105           2 :   var_coeffs_pntrs_(0) {
     106             :   // add citation and print it to log
     107           2 :   log << "  Adam type stochastic gradient decent\n";
     108           2 :   parseFlag("AMSGRAD",amsgrad_);
     109           2 :   if (amsgrad_) {
     110           1 :     log << "  Using the AMSGrad variant of the Adam algorithm, see and cite\n";
     111             :   }
     112             : 
     113           2 :   double tmp_weight_decay = 0.0;
     114           2 :   parse("ADAMW_WEIGHT_DECAY",tmp_weight_decay);
     115           2 :   if (tmp_weight_decay != 0.0) {
     116           0 :     adamw_ = true;
     117           0 :     log << "  Using the AdamW variant (Adam with weight decay), see and cite\n";
     118           0 :     one_minus_weight_decay_ = 1 - tmp_weight_decay;
     119           0 :     log << "    weight decay parameter: " << tmp_weight_decay << "\n";
     120             :   }
     121             : 
     122           2 :   log << "  Adam parameters:\n";
     123           2 :   parse("BETA_1",beta_1_);
     124           2 :   plumed_massert(beta_1_ > 0 && beta_1_ <= 1, "BETA_1 must be between 0 and 1");
     125           2 :   log << "    beta_1: " << beta_1_ << "\n";
     126             : 
     127           2 :   parse("BETA_2",beta_2_);
     128           2 :   plumed_massert(beta_2_ > 0 && beta_2_ <= 1, "BETA_2 must be between 0 and 1");
     129           2 :   log << "    beta_2: " << beta_2_ << "\n";
     130             : 
     131           2 :   parse("EPSILON",epsilon_);
     132           2 :   plumed_massert(epsilon_ > 0 && epsilon_ <= 1, "EPSILON must be between 0 and 1");
     133           2 :   log << "    epsilon: " << epsilon_ << "\n";
     134             : 
     135             : 
     136             :   // set up the coeff vector for the 2nd moment of the gradient (variance)
     137           4 :   for (unsigned i = 0; i < numberOfCoeffsSets(); ++i) {
     138           2 :     var_coeffs_pntrs_.emplace_back(std::unique_ptr<CoeffsVector>(new CoeffsVector(Coeffs(i))));
     139           4 :     VarCoeffs(i).replaceLabelString("coeffs","grad_var");
     140           2 :     VarCoeffs(i).setAllValuesToZero(); // can Coeffs(i) even be non-zero at this point?
     141             : 
     142             :     // add second set of coefficients to store the maximum values of the 2nd moment
     143           2 :     if (amsgrad_) {
     144           1 :       varmax_coeffs_pntrs_.emplace_back(std::unique_ptr<CoeffsVector>(new CoeffsVector(VarCoeffs(i))));
     145           2 :       VarmaxCoeffs(i).replaceLabelString("coeffs","grad_varmax");
     146             :     }
     147             : 
     148             :     // also rename the Coeffs used for the mean of the gradient
     149           4 :     AuxCoeffs(i).replaceLabelString("coeffs","grad_mean");
     150             :   }
     151             : 
     152           2 :   checkRead();
     153           2 : }
     154             : 
     155             : 
     156          20 : void Opt_Adam::coeffsUpdate(const unsigned int c_id) {
     157          20 :   time_++;
     158             :   // AuxCoeffs is used for first moment (mean)
     159          20 :   AuxCoeffs(c_id) *= beta_1_;
     160          20 :   AuxCoeffs(c_id) += (1 - beta_1_ ) * Gradient(c_id) * CoeffsMask(c_id);
     161          20 :   VarCoeffs(c_id) *= beta_2_;
     162          20 :   VarCoeffs(c_id) += (1 - beta_2_ ) * Gradient(c_id) * Gradient(c_id) * CoeffsMask(c_id);
     163             : 
     164          20 :   if (amsgrad_) {
     165         120 :     for (size_t i = 0; i< VarCoeffs(c_id).getSize(); ++i) {
     166         110 :       if (VarCoeffs(c_id).getValue(i) > VarmaxCoeffs(c_id).getValue(i)) {
     167          95 :         VarmaxCoeffs(c_id)[i] = VarCoeffs(c_id).getValue(i);
     168             :       }
     169             :     }
     170             :   }
     171             : 
     172             :   // store sqrt of VarCoeffs in vector, easier than writing a CoeffsVector::sqrt() function
     173             :   // also directly add epsilon and invert to multiply with the Coeffs in last step
     174             :   std::vector<double> var_coeffs_sqrt;
     175          20 :   if (!amsgrad_) {
     176         120 :     for (size_t i = 0; i< VarCoeffs(c_id).getSize(); ++i) {
     177         110 :       var_coeffs_sqrt.push_back(1 / (sqrt(VarCoeffs(c_id).getValue(i)) + epsilon));
     178             :     }
     179             :   } else { // use VarmaxCoffs instead of VarCoeffs
     180         120 :     for (size_t i = 0; i< VarmaxCoeffs(c_id).getSize(); ++i) {
     181         110 :       var_coeffs_sqrt.push_back(1 / (sqrt(VarmaxCoeffs(c_id).getValue(i)) + epsilon));
     182             :     }
     183             :   }
     184             : 
     185             :   // bias correction
     186          20 :   double scalefactor = StepSize(c_id) * sqrt(1 - pow(beta_2_, time_)) / (1 - pow(beta_1_, time_));
     187             : 
     188          20 :   if (adamw_) { // check is not necessary but probably faster than always multiplying by 1
     189           0 :     Coeffs(c_id) *= one_minus_weight_decay_ * CoeffsMask(c_id);
     190             :   }
     191             : 
     192             :   // coeff update
     193          20 :   Coeffs(c_id) -= scalefactor * AuxCoeffs(c_id) * var_coeffs_sqrt * CoeffsMask(c_id);
     194          20 : }
     195             : 
     196             : 
     197             : }
     198             : }

Generated by: LCOV version 1.16