LCOV - code coverage report
Current view: top level - isdb - EMMI.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 661 871 75.9 %
Date: 2025-03-25 09:33:27 Functions: 31 39 79.5 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2017-2023 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "core/ActionWithValue.h"
      23             : #include "core/ActionAtomistic.h"
      24             : #include "core/ActionWithArguments.h"
      25             : #include "core/ActionRegister.h"
      26             : #include "core/PlumedMain.h"
      27             : #include "tools/Matrix.h"
      28             : #include "tools/Communicator.h"
      29             : #include "core/GenericMolInfo.h"
      30             : #include "core/ActionSet.h"
      31             : #include "tools/File.h"
      32             : #include "tools/Random.h"
      33             : 
      34             : #include <string>
      35             : #include <map>
      36             : #include <numeric>
      37             : #include <ctime>
      38             : 
      39             : namespace PLMD {
      40             : namespace isdb {
      41             : 
      42             : //+PLUMEDOC ISDB_COLVAR EMMI
      43             : /*
      44             : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
      45             : 
      46             : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in  Ref. \cite Hanot113951 .
      47             : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
      48             : 
      49             : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
      50             : GMM_FILE. We are currently working on a web server to perform
      51             : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
      52             : 
      53             : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
      54             : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
      55             : the Metainference approach \cite Bonomi:2016ip .
      56             : 
      57             : \warning
      58             : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
      59             : 
      60             : \note
      61             : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
      62             : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
      63             : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
      64             : 
      65             : \note
      66             : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
      67             : add the NOPBC flag to the input line
      68             : 
      69             : \par Examples
      70             : 
      71             : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
      72             : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
      73             : 
      74             : \plumedfile
      75             : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
      76             :      0  2.9993805e+01   6.54628 10.37820 -0.92988  2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02  1
      77             :      1  2.3468312e+01   6.56095 10.34790 -0.87808  1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02  1
      78             :      ...
      79             : \endplumedfile
      80             : 
      81             : To accelerate the computation of the Bayesian score, one can:
      82             : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
      83             : - calculate the restraint every other step (or more).
      84             : 
      85             : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
      86             : using a GROMACS index file.
      87             : 
      88             : The input file looks as follows:
      89             : 
      90             : \plumedfile
      91             : # include pdb info
      92             : MOLINFO STRUCTURE=prot.pdb
      93             : 
      94             : #  all heavy atoms
      95             : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
      96             : 
      97             : # create EMMI score
      98             : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
      99             : 
     100             : # translate into bias - apply every 2 steps
     101             : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
     102             : 
     103             : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
     104             : \endplumedfile
     105             : 
     106             : 
     107             : */
     108             : //+ENDPLUMEDOC
     109             : 
     110             : class EMMI :
     111             :   public ActionAtomistic,
     112             :   public ActionWithArguments,
     113             :   public ActionWithValue {
     114             : private:
     115             : 
     116             : // temperature in kbt
     117             :   double kbt_;
     118             : // model GMM - atom types
     119             :   std::vector<unsigned> GMM_m_type_;
     120             : // model GMM - list of atom sigmas - one per atom type
     121             :   std::vector<double> GMM_m_s_;
     122             : // model GMM - list of atom weights - one per atom type
     123             :   std::vector<double> GMM_m_w_;
     124             : // data GMM - means, weights, and covariances + beta option
     125             :   std::vector<Vector>             GMM_d_m_;
     126             :   std::vector<double>             GMM_d_w_;
     127             :   std::vector< VectorGeneric<6> > GMM_d_cov_;
     128             :   std::vector<int>                GMM_d_beta_;
     129             :   std::vector < std::vector<int> >     GMM_d_grps_;
     130             : // overlaps
     131             :   std::vector<double> ovmd_;
     132             :   std::vector<double> ovdd_;
     133             :   std::vector<double> ovmd_ave_;
     134             : // and derivatives
     135             :   std::vector<Vector> ovmd_der_;
     136             :   std::vector<Vector> atom_der_;
     137             :   std::vector<double> GMMid_der_;
     138             : // constants
     139             :   double cfact_;
     140             :   double inv_sqrt2_, sqrt2_pi_;
     141             : // metainference
     142             :   unsigned nrep_;
     143             :   unsigned replica_;
     144             :   std::vector<double> sigma_;
     145             :   std::vector<double> sigma_min_;
     146             :   std::vector<double> sigma_max_;
     147             :   std::vector<double> dsigma_;
     148             : // list of prefactors for overlap between two Gaussians
     149             : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
     150             :   std::vector<double> pre_fact_;
     151             : // inverse of the sum of model and data covariances matrices
     152             :   std::vector< VectorGeneric<6> > inv_cov_md_;
     153             : // neighbor list
     154             :   double   nl_cutoff_;
     155             :   unsigned nl_stride_;
     156             :   bool first_time_;
     157             :   bool no_aver_;
     158             :   std::vector<unsigned> nl_;
     159             : // parallel stuff
     160             :   unsigned size_;
     161             :   unsigned rank_;
     162             : // pbc
     163             :   bool pbc_;
     164             : // Monte Carlo stuff
     165             :   int      MCstride_;
     166             :   double   MCaccept_;
     167             :   double   MCtrials_;
     168             :   Random   random_;
     169             :   // status stuff
     170             :   unsigned int statusstride_;
     171             :   std::string       statusfilename_;
     172             :   OFile        statusfile_;
     173             :   bool         first_status_;
     174             :   // regression
     175             :   unsigned nregres_;
     176             :   double scale_;
     177             :   double scale_min_;
     178             :   double scale_max_;
     179             :   double dscale_;
     180             :   // tabulated exponential
     181             :   double dpcutoff_;
     182             :   double dexp_;
     183             :   unsigned nexp_;
     184             :   std::vector<double> tab_exp_;
     185             :   // simulated annealing
     186             :   unsigned nanneal_;
     187             :   double   kanneal_;
     188             :   double   anneal_;
     189             :   // prior exponent
     190             :   double prior_;
     191             :   // noise type
     192             :   unsigned noise_;
     193             :   // total score and virial;
     194             :   double ene_;
     195             :   Tensor virial_;
     196             :   // model overlap file
     197             :   unsigned int ovstride_;
     198             :   std::string       ovfilename_;
     199             : 
     200             :   // Reweighting additions
     201             :   bool do_reweight_;
     202             :   bool first_time_w_;
     203             :   std::vector<double> forces;
     204             :   std::vector<double> forcesToApply;
     205             : 
     206             :   // average weights
     207             :   double decay_w_;
     208             :   std::vector<double> average_weights_;
     209             : 
     210             : // write file with model overlap
     211             :   void write_model_overlap(long long int step);
     212             : // get median of std::vector
     213             :   double get_median(std::vector<double> &v);
     214             : // annealing
     215             :   double get_annealing(long long int step);
     216             : // do regression
     217             :   double scaleEnergy(double s);
     218             :   double doRegression();
     219             : // read and write status
     220             :   void read_status();
     221             :   void print_status(long long int step);
     222             : // accept or reject
     223             :   bool doAccept(double oldE, double newE, double kbt);
     224             : // do MonteCarlo
     225             :   void doMonteCarlo();
     226             : // read error file
     227             :   std::vector<double> read_exp_errors(const std::string & errfile);
     228             : // read experimental overlaps
     229             :   std::vector<double> read_exp_overlaps(const std::string & ovfile);
     230             : // calculate model GMM weights and covariances
     231             :   std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
     232             : // read data GMM file
     233             :   void get_GMM_d(const std::string & gmm_file);
     234             : // check GMM data
     235             :   void check_GMM_d(const VectorGeneric<6> &cov, double w);
     236             : // auxiliary method
     237             :   void calculate_useful_stuff(double reso);
     238             : // get pref_fact and inv_cov_md
     239             :   double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
     240             :                                 double GMM_w_0, double GMM_w_1,
     241             :                                 VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
     242             : // calculate self overlaps between data GMM components - ovdd_
     243             :   double get_self_overlap(unsigned id);
     244             : // calculate overlap between two Gaussians
     245             :   double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
     246             :                      const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
     247             : // calculate exponent of overlap for neighbor list update
     248             :   double get_exp_overlap(const Vector &m_m, const Vector &d_m,
     249             :                          const VectorGeneric<6> &inv_cov_md);
     250             : // update the neighbor list
     251             :   void update_neighbor_list();
     252             : // calculate overlap
     253             :   void calculate_overlap();
     254             : // Gaussian noise
     255             :   void calculate_Gauss();
     256             : // Outliers noise
     257             :   void calculate_Outliers();
     258             : // Marginal noise
     259             :   void calculate_Marginal();
     260             : 
     261             :   // See MetainferenceBase
     262             :   void get_weights(double &weight, double &norm, double &neff);
     263             : 
     264             : public:
     265             :   static void registerKeywords( Keywords& keys );
     266             :   explicit EMMI(const ActionOptions&);
     267             :   // needed for reweighting
     268             :   void setDerivatives();
     269             :   void turnOnDerivatives() override;
     270             :   unsigned getNumberOfDerivatives() override;
     271             :   void lockRequests() override;
     272             :   void unlockRequests() override;
     273             :   void calculateNumericalDerivatives( ActionWithValue* a ) override;
     274             :   void apply() override;
     275             :   void setArgDerivatives(Value *v, const double &d);
     276             :   void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
     277             :   void setBoxDerivatives(Value*v, const Tensor&d);
     278             : // active methods:
     279             :   void prepare() override;
     280             :   void calculate() override;
     281             : };
     282             : 
     283             : inline
     284          30 : void EMMI::setDerivatives() {
     285             :   // Get appropriate number of derivatives
     286             :   // Derivatives are first for arguments and then for atoms
     287             :   unsigned nder;
     288          30 :   if( getNumberOfAtoms()>0 ) {
     289          30 :     nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
     290             :   } else {
     291           0 :     nder = getNumberOfArguments();
     292             :   }
     293             : 
     294             :   // Resize all derivative arrays
     295          30 :   forces.resize( nder );
     296          30 :   forcesToApply.resize( nder );
     297         146 :   for(int i=0; i<getNumberOfComponents(); ++i) {
     298         116 :     getPntrToComponent(i)->resizeDerivatives(nder);
     299             :   }
     300          30 : }
     301             : 
     302             : inline
     303          22 : void EMMI::turnOnDerivatives() {
     304          22 :   ActionWithValue::turnOnDerivatives();
     305          22 : }
     306             : 
     307             : inline
     308          80 : unsigned EMMI::getNumberOfDerivatives() {
     309          80 :   if( getNumberOfAtoms()>0 ) {
     310          80 :     return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
     311             :   }
     312           0 :   return getNumberOfArguments();
     313             : }
     314             : 
     315             : inline
     316          70 : void EMMI::lockRequests() {
     317             :   ActionAtomistic::lockRequests();
     318             :   ActionWithArguments::lockRequests();
     319          70 : }
     320             : 
     321             : inline
     322          70 : void EMMI::unlockRequests() {
     323             :   ActionAtomistic::unlockRequests();
     324             :   ActionWithArguments::unlockRequests();
     325          70 : }
     326             : 
     327             : inline
     328          10 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
     329          10 :   if( getNumberOfArguments()>0 ) {
     330           0 :     ActionWithArguments::calculateNumericalDerivatives( a );
     331             :   }
     332          10 :   if( getNumberOfAtoms()>0 ) {
     333          10 :     Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
     334          44 :     for(int j=0; j<getNumberOfComponents(); ++j) {
     335          34 :       for(unsigned i=0; i<getNumberOfArguments(); ++i)
     336           0 :         if(getPntrToComponent(j)->hasDerivatives()) {
     337           0 :           save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
     338             :         }
     339             :     }
     340          10 :     calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
     341          44 :     for(int j=0; j<getNumberOfComponents(); ++j) {
     342          34 :       for(unsigned i=0; i<getNumberOfArguments(); ++i)
     343           0 :         if(getPntrToComponent(j)->hasDerivatives()) {
     344           0 :           getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
     345             :         }
     346             :     }
     347             :   }
     348          10 : }
     349             : 
     350             : inline
     351          70 : void EMMI::apply() {
     352             :   bool wasforced=false;
     353          70 :   forcesToApply.assign(forcesToApply.size(),0.0);
     354         382 :   for(int i=0; i<getNumberOfComponents(); ++i) {
     355         312 :     if( getPntrToComponent(i)->applyForce( forces ) ) {
     356             :       wasforced=true;
     357           0 :       for(unsigned i=0; i<forces.size(); ++i) {
     358           0 :         forcesToApply[i]+=forces[i];
     359             :       }
     360             :     }
     361             :   }
     362          70 :   if( wasforced ) {
     363           0 :     unsigned ind=0;
     364           0 :     addForcesOnArguments( 0, forcesToApply, ind, getLabel() );
     365           0 :     if( getNumberOfAtoms()>0 ) {
     366           0 :       setForcesOnAtoms( forcesToApply, ind );
     367             :     }
     368             :   }
     369          70 : }
     370             : 
     371             : inline
     372             : void EMMI::setArgDerivatives(Value *v, const double &d) {
     373             :   v->addDerivative(0,d);
     374             : }
     375             : 
     376             : inline
     377    10961336 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
     378    10961336 :   const unsigned noa=getNumberOfArguments();
     379    10961336 :   v->addDerivative(noa+3*i+0,d[0]);
     380    10961336 :   v->addDerivative(noa+3*i+1,d[1]);
     381    10961336 :   v->addDerivative(noa+3*i+2,d[2]);
     382    10961336 : }
     383             : 
     384             : inline
     385       18220 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
     386       18220 :   const unsigned noa=getNumberOfArguments();
     387             :   const unsigned nat=getNumberOfAtoms();
     388       18220 :   v->addDerivative(noa+3*nat+0,d(0,0));
     389       18220 :   v->addDerivative(noa+3*nat+1,d(0,1));
     390       18220 :   v->addDerivative(noa+3*nat+2,d(0,2));
     391       18220 :   v->addDerivative(noa+3*nat+3,d(1,0));
     392       18220 :   v->addDerivative(noa+3*nat+4,d(1,1));
     393       18220 :   v->addDerivative(noa+3*nat+5,d(1,2));
     394       18220 :   v->addDerivative(noa+3*nat+6,d(2,0));
     395       18220 :   v->addDerivative(noa+3*nat+7,d(2,1));
     396       18220 :   v->addDerivative(noa+3*nat+8,d(2,2));
     397       18220 : }
     398             : 
     399             : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
     400             : 
     401          32 : void EMMI::registerKeywords( Keywords& keys ) {
     402          32 :   Action::registerKeywords( keys );
     403          32 :   ActionAtomistic::registerKeywords( keys );
     404          32 :   ActionWithValue::registerKeywords( keys );
     405          32 :   ActionWithArguments::registerKeywords( keys );
     406          64 :   keys.addInputKeyword("optional","ARG","scalar","the labels of the values from which the function is calculated");
     407          32 :   keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
     408          32 :   keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
     409          32 :   keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
     410          32 :   keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
     411          32 :   keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
     412          32 :   keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
     413          32 :   keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
     414          32 :   keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
     415          32 :   keys.add("optional","SIGMA0","initial value of the uncertainty");
     416          32 :   keys.add("optional","DSIGMA","MC step for uncertainties");
     417          32 :   keys.add("optional","MC_STRIDE", "Monte Carlo stride");
     418          32 :   keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
     419          32 :   keys.add("optional","OV_FILE","file with experimental overlaps");
     420          32 :   keys.add("optional","NORM_DENSITY","integral of the experimental density");
     421          32 :   keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
     422          32 :   keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
     423          32 :   keys.add("optional","REGRESSION","regression stride");
     424          32 :   keys.add("optional","REG_SCALE_MIN","regression minimum scale");
     425          32 :   keys.add("optional","REG_SCALE_MAX","regression maximum scale");
     426          32 :   keys.add("optional","REG_DSCALE","regression maximum scale MC move");
     427          32 :   keys.add("optional","SCALE","scale factor");
     428          32 :   keys.add("optional","ANNEAL", "Length of annealing cycle");
     429          32 :   keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
     430          32 :   keys.add("optional","TEMP","temperature");
     431          32 :   keys.add("optional","PRIOR", "exponent of uncertainty prior");
     432          32 :   keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
     433          32 :   keys.add("optional","WRITE_OV","write a file with model overlaps");
     434          32 :   keys.add("optional","AVERAGING", "Averaging window for weights");
     435          32 :   keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
     436          32 :   keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
     437          64 :   keys.addOutputComponent("scoreb","default","scalar","Bayesian score");
     438          64 :   keys.addOutputComponent("acc",   "NOISETYPE","scalar","MC acceptance for uncertainty");
     439          64 :   keys.addOutputComponent("scale", "REGRESSION","scalar","scale factor");
     440          64 :   keys.addOutputComponent("accscale", "REGRESSION","scalar","MC acceptance for scale regression");
     441          64 :   keys.addOutputComponent("enescale", "REGRESSION","scalar","MC energy for scale regression");
     442          64 :   keys.addOutputComponent("anneal","ANNEAL","scalar","annealing factor");
     443          64 :   keys.addOutputComponent("weight",       "REWEIGHT","scalar",     "weights of the weighted average");
     444          64 :   keys.addOutputComponent("biasDer",      "REWEIGHT","scalar",     "derivatives with respect to the bias");
     445          64 :   keys.addOutputComponent("sigma",      "NOISETYPE","scalar",     "uncertainty in the forward models and experiment");
     446          64 :   keys.addOutputComponent("neff",         "default","scalar",      "effective number of replicas");
     447          32 : }
     448             : 
     449          30 : EMMI::EMMI(const ActionOptions&ao):
     450             :   Action(ao),
     451             :   ActionAtomistic(ao),
     452             :   ActionWithArguments(ao),
     453             :   ActionWithValue(ao),
     454          30 :   inv_sqrt2_(0.707106781186548),
     455          30 :   sqrt2_pi_(0.797884560802865),
     456          30 :   first_time_(true), no_aver_(false), pbc_(true),
     457          30 :   MCstride_(1), MCaccept_(0.), MCtrials_(0.),
     458          60 :   statusstride_(0), first_status_(true),
     459          30 :   nregres_(0), scale_(1.),
     460          30 :   dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
     461          60 :   kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
     462          30 :   do_reweight_(false), first_time_w_(true), decay_w_(1.) {
     463             :   // periodic boundary conditions
     464          30 :   bool nopbc=!pbc_;
     465          30 :   parseFlag("NOPBC",nopbc);
     466          30 :   pbc_=!nopbc;
     467             : 
     468             :   // list of atoms
     469             :   std::vector<AtomNumber> atoms;
     470          60 :   parseAtomList("ATOMS", atoms);
     471             : 
     472             :   // file with data GMM
     473             :   std::string GMM_file;
     474          60 :   parse("GMM_FILE", GMM_file);
     475             : 
     476             :   // type of data noise
     477             :   std::string noise;
     478          60 :   parse("NOISETYPE",noise);
     479          30 :   if      (noise=="GAUSS") {
     480          18 :     noise_ = 0;
     481          12 :   } else if(noise=="OUTLIERS") {
     482           6 :     noise_ = 1;
     483           6 :   } else if(noise=="MARGINAL") {
     484           6 :     noise_ = 2;
     485             :   } else {
     486           0 :     error("Unknown noise type!");
     487             :   }
     488             : 
     489             :   // minimum value for error
     490             :   double sigma_min;
     491          30 :   parse("SIGMA_MIN", sigma_min);
     492          30 :   if(sigma_min<0) {
     493           0 :     error("SIGMA_MIN should be greater or equal to zero");
     494             :   }
     495             : 
     496             :   // the following parameters must be specified with noise type 0 and 1
     497             :   double sigma_ini, dsigma;
     498          30 :   if(noise_!=2) {
     499             :     // initial value of the uncertainty
     500          24 :     parse("SIGMA0", sigma_ini);
     501          24 :     if(sigma_ini<=0) {
     502           0 :       error("you must specify a positive SIGMA0");
     503             :     }
     504             :     // MC parameters
     505          24 :     parse("DSIGMA", dsigma);
     506          24 :     if(dsigma<0) {
     507           0 :       error("you must specify a positive DSIGMA");
     508             :     }
     509          24 :     parse("MC_STRIDE", MCstride_);
     510          24 :     if(dsigma>0 && MCstride_<=0) {
     511           0 :       error("you must specify a positive MC_STRIDE");
     512             :     }
     513             :     // status file parameters
     514          24 :     parse("WRITE_STRIDE", statusstride_);
     515          24 :     if(statusstride_==0) {
     516           0 :       error("you must specify a positive WRITE_STRIDE");
     517             :     }
     518          48 :     parse("STATUS_FILE",  statusfilename_);
     519          24 :     if(statusfilename_=="") {
     520          48 :       statusfilename_ = "MISTATUS"+getLabel();
     521             :     } else {
     522           0 :       statusfilename_ = statusfilename_+getLabel();
     523             :     }
     524             :   }
     525             : 
     526             :   // error file
     527             :   std::string errfile;
     528          60 :   parse("ERR_FILE", errfile);
     529             : 
     530             :   // file with experimental overlaps
     531             :   std::string ovfile;
     532          30 :   parse("OV_FILE", ovfile);
     533             : 
     534             :   // integral of the experimetal density
     535          30 :   double norm_d = 0.0;
     536          30 :   parse("NORM_DENSITY", norm_d);
     537             : 
     538             :   // temperature
     539          30 :   kbt_ = getkBT();
     540             : 
     541             :   // exponent of uncertainty prior
     542          30 :   parse("PRIOR",prior_);
     543             : 
     544             :   // simulated annealing stuff
     545          30 :   parse("ANNEAL", nanneal_);
     546          30 :   parse("ANNEAL_FACT", kanneal_);
     547          30 :   if(nanneal_>0 && kanneal_<=1.0) {
     548           0 :     error("with ANNEAL, ANNEAL_FACT must be greater than 1");
     549             :   }
     550             : 
     551             :   // regression stride
     552          30 :   parse("REGRESSION",nregres_);
     553             :   // other regression parameters
     554          30 :   if(nregres_>0) {
     555           0 :     parse("REG_SCALE_MIN",scale_min_);
     556           0 :     parse("REG_SCALE_MAX",scale_max_);
     557           0 :     parse("REG_DSCALE",dscale_);
     558             :     // checks
     559           0 :     if(scale_max_<=scale_min_) {
     560           0 :       error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
     561             :     }
     562           0 :     if(dscale_<=0.) {
     563           0 :       error("with REGRESSION, REG_DSCALE must be positive");
     564             :     }
     565             :   }
     566             : 
     567             :   // scale factor
     568          30 :   parse("SCALE", scale_);
     569             : 
     570             :   // read map resolution
     571             :   double reso;
     572          30 :   parse("RESOLUTION", reso);
     573          30 :   if(reso<=0.) {
     574           0 :     error("RESOLUTION should be strictly positive");
     575             :   }
     576             : 
     577             :   // neighbor list stuff
     578          30 :   parse("NL_CUTOFF",nl_cutoff_);
     579          30 :   if(nl_cutoff_<=0.0) {
     580           0 :     error("NL_CUTOFF should be explicitly specified and positive");
     581             :   }
     582          30 :   parse("NL_STRIDE",nl_stride_);
     583          30 :   if(nl_stride_==0) {
     584           0 :     error("NL_STRIDE should be explicitly specified and positive");
     585             :   }
     586             : 
     587             :   // averaging or not
     588          30 :   parseFlag("NO_AVER",no_aver_);
     589             : 
     590             :   // write overlap file
     591          30 :   parse("WRITE_OV_STRIDE", ovstride_);
     592          30 :   parse("WRITE_OV", ovfilename_);
     593          32 :   if(ovstride_>0 && ovfilename_=="") {
     594           0 :     error("With WRITE_OV_STRIDE you must specify WRITE_OV");
     595             :   }
     596             : 
     597             :   // set parallel stuff
     598          30 :   size_=comm.Get_size();
     599          30 :   rank_=comm.Get_rank();
     600             : 
     601             :   // get number of replicas
     602          30 :   if(rank_==0) {
     603          18 :     if(no_aver_) {
     604          16 :       nrep_ = 1;
     605             :     } else {
     606           2 :       nrep_ = multi_sim_comm.Get_size();
     607             :     }
     608          18 :     replica_ = multi_sim_comm.Get_rank();
     609             :   } else {
     610          12 :     nrep_ = 0;
     611          12 :     replica_ = 0;
     612             :   }
     613          30 :   comm.Sum(&nrep_,1);
     614          30 :   comm.Sum(&replica_,1);
     615             : 
     616             :   // Reweighting flag
     617          30 :   parseFlag("REWEIGHT", do_reweight_);
     618          30 :   if(do_reweight_&&getNumberOfArguments()!=1) {
     619           0 :     error("To REWEIGHT one must provide one single bias as an argument");
     620             :   }
     621          30 :   if(do_reweight_&&no_aver_) {
     622           0 :     error("REWEIGHT cannot be used with NO_AVER");
     623             :   }
     624          30 :   if(do_reweight_&&nrep_<2) {
     625           0 :     error("REWEIGHT can only be used in parallel with 2 or more replicas");
     626             :   }
     627          30 :   if(!getRestart()) {
     628          30 :     average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
     629             :   } else {
     630           0 :     average_weights_.resize(nrep_, 0.);
     631             :   }
     632             : 
     633          30 :   unsigned averaging=0;
     634          30 :   parse("AVERAGING", averaging);
     635          30 :   if(averaging>0) {
     636           2 :     decay_w_ = 1./static_cast<double> (averaging);
     637             :   }
     638             : 
     639          30 :   checkRead();
     640             : 
     641          30 :   log.printf("  atoms involved : ");
     642       16906 :   for(unsigned i=0; i<atoms.size(); ++i) {
     643       16876 :     log.printf("%d ",atoms[i].serial());
     644             :   }
     645          30 :   log.printf("\n");
     646          30 :   log.printf("  GMM data file : %s\n", GMM_file.c_str());
     647          30 :   if(no_aver_) {
     648          28 :     log.printf("  without ensemble averaging\n");
     649             :   }
     650          30 :   log.printf("  type of data noise : %s\n", noise.c_str());
     651          30 :   log.printf("  neighbor list cutoff : %lf\n", nl_cutoff_);
     652          30 :   log.printf("  neighbor list stride : %u\n",  nl_stride_);
     653          30 :   log.printf("  minimum uncertainty : %f\n",sigma_min);
     654          30 :   log.printf("  scale factor : %lf\n",scale_);
     655          30 :   if(nregres_>0) {
     656           0 :     log.printf("  regression stride : %u\n", nregres_);
     657           0 :     log.printf("  regression minimum scale : %lf\n", scale_min_);
     658           0 :     log.printf("  regression maximum scale : %lf\n", scale_max_);
     659           0 :     log.printf("  regression maximum scale MC move : %lf\n", dscale_);
     660             :   }
     661          30 :   if(noise_!=2) {
     662          24 :     log.printf("  initial value of the uncertainty : %f\n",sigma_ini);
     663          24 :     log.printf("  max MC move in uncertainty : %f\n",dsigma);
     664          24 :     log.printf("  MC stride : %u\n", MCstride_);
     665          24 :     log.printf("  reading/writing to status file : %s\n",statusfilename_.c_str());
     666          24 :     log.printf("  with stride : %u\n",statusstride_);
     667             :   }
     668          30 :   if(errfile.size()>0) {
     669           0 :     log.printf("  reading experimental errors from file : %s\n", errfile.c_str());
     670             :   }
     671          30 :   if(ovfile.size()>0) {
     672           0 :     log.printf("  reading experimental overlaps from file : %s\n", ovfile.c_str());
     673             :   }
     674          30 :   log.printf("  temperature of the system in energy unit : %f\n",kbt_);
     675          30 :   log.printf("  prior exponent : %f\n",prior_);
     676          30 :   log.printf("  number of replicas for averaging: %u\n",nrep_);
     677          30 :   log.printf("  id of the replica : %u\n",replica_);
     678          30 :   if(nanneal_>0) {
     679           4 :     log.printf("  length of annealing cycle : %u\n",nanneal_);
     680           4 :     log.printf("  annealing factor : %f\n",kanneal_);
     681             :   }
     682          30 :   if(ovstride_>0) {
     683           2 :     log.printf("  stride for writing model overlaps : %u\n",ovstride_);
     684           2 :     log.printf("  file for writing model overlaps : %s\n", ovfilename_.c_str());
     685             :   }
     686             : 
     687             :   // set constant quantity before calculating stuff
     688          30 :   cfact_ = 1.0/pow( 2.0*pi, 1.5 );
     689             : 
     690             :   // calculate model GMM constant parameters
     691          30 :   std::vector<double> GMM_m_w = get_GMM_m(atoms);
     692             : 
     693             :   // read data GMM parameters
     694          30 :   get_GMM_d(GMM_file);
     695          30 :   log.printf("  number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
     696             : 
     697             :   // normalize atom weight map
     698          30 :   if(norm_d <= 0.0) {
     699          30 :     norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
     700             :   }
     701             :   double norm_m = accumulate(GMM_m_w.begin(),  GMM_m_w.end(),  0.0);
     702             :   // renormalization
     703         150 :   for(unsigned i=0; i<GMM_m_w_.size(); ++i) {
     704         120 :     GMM_m_w_[i] *= norm_d / norm_m;
     705             :   }
     706             : 
     707             :   // read experimental errors
     708             :   std::vector<double> exp_err;
     709          30 :   if(errfile.size()>0) {
     710           0 :     exp_err = read_exp_errors(errfile);
     711             :   }
     712             : 
     713             :   // get self overlaps between data GMM components
     714          30 :   if(ovfile.size()>0) {
     715           0 :     ovdd_ = read_exp_overlaps(ovfile);
     716             :   } else {
     717        2082 :     for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
     718        2052 :       double ov = get_self_overlap(i);
     719        2052 :       ovdd_.push_back(ov);
     720             :     }
     721             :   }
     722             : 
     723          30 :   log.printf("  number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
     724             :   // cycle on GMM groups
     725          60 :   for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
     726          30 :     log.printf("    group %d\n", Gid);
     727             :     // calculate median overlap and experimental error
     728             :     std::vector<double> ovdd;
     729             :     std::vector<double> err;
     730             :     // cycle on the group members
     731        2082 :     for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
     732             :       // GMM id
     733        2052 :       int GMMid = GMM_d_grps_[Gid][i];
     734             :       // add to experimental error
     735        2052 :       if(errfile.size()>0) {
     736           0 :         err.push_back(exp_err[GMMid]);
     737             :       } else {
     738        2052 :         err.push_back(0.);
     739             :       }
     740             :       // add to GMM overlap
     741        2052 :       ovdd.push_back(ovdd_[GMMid]);
     742             :     }
     743             :     // calculate median quantities
     744          30 :     double ovdd_m = get_median(ovdd);
     745          30 :     double err_m  = get_median(err);
     746             :     // print out statistics
     747          30 :     log.printf("     # of members : %zu\n", GMM_d_grps_[Gid].size());
     748          30 :     log.printf("     median overlap : %lf\n", ovdd_m);
     749          30 :     log.printf("     median error : %lf\n", err_m);
     750             :     // add minimum value of sigma for this group of GMMs
     751          30 :     sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
     752             :     // these are only needed with Gaussian and Outliers noise models
     753          30 :     if(noise_!=2) {
     754             :       // set dsigma
     755          24 :       dsigma_.push_back(dsigma * ovdd_m);
     756             :       // set sigma max
     757          24 :       sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
     758             :       // initialize sigma
     759          48 :       sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
     760             :     }
     761             :   }
     762             : 
     763             :   // read status file if restarting
     764          30 :   if(getRestart() && noise_!=2) {
     765           0 :     read_status();
     766             :   }
     767             : 
     768             :   // calculate auxiliary stuff
     769          30 :   calculate_useful_stuff(reso);
     770             : 
     771             :   // prepare data and derivative std::vectors
     772          30 :   ovmd_.resize(ovdd_.size());
     773          30 :   ovmd_ave_.resize(ovdd_.size());
     774          30 :   atom_der_.resize(GMM_m_type_.size());
     775          30 :   GMMid_der_.resize(ovdd_.size());
     776             : 
     777             :   // clear things that are no longer needed
     778             :   GMM_d_cov_.clear();
     779             : 
     780             :   // add components
     781          60 :   addComponentWithDerivatives("scoreb");
     782          30 :   componentIsNotPeriodic("scoreb");
     783             : 
     784          30 :   if(noise_!=2) {
     785          48 :     addComponent("acc");
     786          48 :     componentIsNotPeriodic("acc");
     787             :   }
     788             : 
     789          30 :   if(nregres_>0) {
     790           0 :     addComponent("scale");
     791           0 :     componentIsNotPeriodic("scale");
     792           0 :     addComponent("accscale");
     793           0 :     componentIsNotPeriodic("accscale");
     794           0 :     addComponent("enescale");
     795           0 :     componentIsNotPeriodic("enescale");
     796             :   }
     797             : 
     798          30 :   if(nanneal_>0) {
     799           8 :     addComponent("anneal");
     800           8 :     componentIsNotPeriodic("anneal");
     801             :   }
     802             : 
     803          30 :   if(do_reweight_) {
     804           4 :     addComponent("biasDer");
     805           4 :     componentIsNotPeriodic("biasDer");
     806           4 :     addComponent("weight");
     807           4 :     componentIsNotPeriodic("weight");
     808             :   }
     809             : 
     810          60 :   addComponent("neff");
     811          30 :   componentIsNotPeriodic("neff");
     812             : 
     813          54 :   for(unsigned i=0; i<sigma_.size(); ++i) {
     814             :     std::string num;
     815          24 :     Tools::convert(i, num);
     816          48 :     addComponent("sigma-"+num);
     817          24 :     componentIsNotPeriodic("sigma-"+num);
     818          48 :     getPntrToComponent("sigma-"+num)->set(sigma_[i]);
     819             :   }
     820             : 
     821             :   // initialize random seed
     822             :   unsigned iseed;
     823          30 :   if(rank_==0) {
     824          18 :     iseed = time(NULL)+replica_;
     825             :   } else {
     826          12 :     iseed = 0;
     827             :   }
     828          30 :   comm.Sum(&iseed, 1);
     829          30 :   random_.setSeed(-iseed);
     830             : 
     831             :   // request the atoms
     832          30 :   requestAtoms(atoms);
     833          30 :   setDerivatives();
     834             : 
     835             :   // print bibliography
     836          60 :   log<<"  Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
     837          60 :   log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
     838          60 :   log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
     839          30 :   if(do_reweight_) {
     840           4 :     log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
     841             :   }
     842          30 :   if(!no_aver_ && nrep_>1) {
     843           4 :     log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
     844             :   }
     845          30 :   log<<"\n";
     846          30 : }
     847             : 
     848           4 : void EMMI::write_model_overlap(long long int step) {
     849           4 :   OFile ovfile;
     850           4 :   ovfile.link(*this);
     851             :   std::string num;
     852           4 :   Tools::convert(step,num);
     853           4 :   std::string name = ovfilename_+"-"+num;
     854           4 :   ovfile.open(name);
     855             :   ovfile.setHeavyFlush();
     856           4 :   ovfile.fmtField("%10.7e ");
     857             : // write overlaps
     858          40 :   for(int i=0; i<ovmd_.size(); ++i) {
     859          36 :     ovfile.printField("Model", ovmd_[i]);
     860          72 :     ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
     861          36 :     ovfile.printField("Data", ovdd_[i]);
     862          36 :     ovfile.printField();
     863             :   }
     864           4 :   ovfile.close();
     865           4 : }
     866             : 
     867          60 : double EMMI::get_median(std::vector<double> &v) {
     868             : // dimension of std::vector
     869          60 :   unsigned size = v.size();
     870             : // in case of only one entry
     871          60 :   if (size==1) {
     872           0 :     return v[0];
     873             :   } else {
     874             :     // reorder std::vector
     875          60 :     sort(v.begin(), v.end());
     876             :     // odd or even?
     877          60 :     if (size%2==0) {
     878           4 :       return (v[size/2-1]+v[size/2])/2.0;
     879             :     } else {
     880          56 :       return v[size/2];
     881             :     }
     882             :   }
     883             : }
     884             : 
     885           0 : void EMMI::read_status() {
     886             :   double MDtime;
     887             : // open file
     888             :   auto ifile = Tools::make_unique<IFile>();
     889           0 :   ifile->link(*this);
     890           0 :   if(ifile->FileExist(statusfilename_)) {
     891           0 :     ifile->open(statusfilename_);
     892           0 :     while(ifile->scanField("MD_time", MDtime)) {
     893           0 :       for(unsigned i=0; i<sigma_.size(); ++i) {
     894             :         // convert i to std::string
     895             :         std::string num;
     896           0 :         Tools::convert(i,num);
     897             :         // read entries
     898           0 :         ifile->scanField("s"+num, sigma_[i]);
     899             :       }
     900             :       // new line
     901           0 :       ifile->scanField();
     902             :     }
     903           0 :     ifile->close();
     904             :   } else {
     905           0 :     error("Cannot find status file "+statusfilename_+"\n");
     906             :   }
     907           0 : }
     908             : 
     909       12729 : void EMMI::print_status(long long int step) {
     910             : // if first time open the file
     911       12729 :   if(first_status_) {
     912          24 :     first_status_ = false;
     913          24 :     statusfile_.link(*this);
     914          24 :     statusfile_.open(statusfilename_);
     915             :     statusfile_.setHeavyFlush();
     916          48 :     statusfile_.fmtField("%6.3e ");
     917             :   }
     918             : // write fields
     919       12729 :   double MDtime = static_cast<double>(step)*getTimeStep();
     920       12729 :   statusfile_.printField("MD_time", MDtime);
     921       25458 :   for(unsigned i=0; i<sigma_.size(); ++i) {
     922             :     // convert i to std::string
     923             :     std::string num;
     924       12729 :     Tools::convert(i,num);
     925             :     // print entry
     926       25458 :     statusfile_.printField("s"+num, sigma_[i]);
     927             :   }
     928       12729 :   statusfile_.printField();
     929       12729 : }
     930             : 
     931           0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
     932             :   bool accept = false;
     933             :   // calculate delta energy
     934           0 :   double delta = ( newE - oldE ) / kbt;
     935             :   // if delta is negative always accept move
     936           0 :   if( delta < 0.0 ) {
     937             :     accept = true;
     938             :   } else {
     939             :     // otherwise extract random number
     940           0 :     double s = random_.RandU01();
     941           0 :     if( s < std::exp(-delta) ) {
     942             :       accept = true;
     943             :     }
     944             :   }
     945           0 :   return accept;
     946             : }
     947             : 
     948           0 : void EMMI::doMonteCarlo() {
     949             :   // extract random GMM group
     950           0 :   unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
     951           0 :   if(nGMM==GMM_d_grps_.size()) {
     952           0 :     nGMM -= 1;
     953             :   }
     954             : 
     955             :   // generate random move
     956           0 :   double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
     957             :   // new sigma
     958           0 :   double new_s = sigma_[nGMM] + shift;
     959             :   // check boundaries
     960           0 :   if(new_s > sigma_max_[nGMM]) {
     961           0 :     new_s = 2.0 * sigma_max_[nGMM] - new_s;
     962             :   }
     963           0 :   if(new_s < sigma_min_[nGMM]) {
     964           0 :     new_s = 2.0 * sigma_min_[nGMM] - new_s;
     965             :   }
     966             :   // old s2
     967           0 :   double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
     968             :   // new s2
     969           0 :   double new_inv_s2 = 1.0 / new_s / new_s;
     970             : 
     971             :   // cycle on GMM group and calculate old and new energy
     972             :   double old_ene = 0.0;
     973             :   double new_ene = 0.0;
     974           0 :   double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
     975             : 
     976             :   // in case of Gaussian noise
     977           0 :   if(noise_==0) {
     978             :     double chi2 = 0.0;
     979           0 :     for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
     980             :       // id GMM component
     981           0 :       int GMMid = GMM_d_grps_[nGMM][i];
     982             :       // deviation
     983           0 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
     984             :       // add to chi2
     985           0 :       chi2 += dev * dev;
     986             :     }
     987             :     // final energy calculation: add normalization and prior
     988           0 :     old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
     989           0 :     new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
     990             :   }
     991             : 
     992             :   // in case of Outliers noise
     993           0 :   if(noise_==1) {
     994           0 :     for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
     995             :       // id GMM component
     996           0 :       int GMMid = GMM_d_grps_[nGMM][i];
     997             :       // calculate deviation
     998           0 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
     999             :       // add to energies
    1000           0 :       old_ene += std::log1p( 0.5 * dev * dev * old_inv_s2);
    1001           0 :       new_ene += std::log1p( 0.5 * dev * dev * new_inv_s2);
    1002             :     }
    1003             :     // final energy calculation: add normalization and prior
    1004           0 :     old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
    1005           0 :     new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
    1006             :   }
    1007             : 
    1008             :   // increment number of trials
    1009           0 :   MCtrials_ += 1.0;
    1010             : 
    1011             :   // accept or reject
    1012           0 :   bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
    1013           0 :   if(accept) {
    1014           0 :     sigma_[nGMM] = new_s;
    1015           0 :     MCaccept_ += 1.0;
    1016             :   }
    1017             :   // local communication
    1018           0 :   if(rank_!=0) {
    1019           0 :     for(unsigned i=0; i<sigma_.size(); ++i) {
    1020           0 :       sigma_[i] = 0.0;
    1021             :     }
    1022           0 :     MCaccept_ = 0.0;
    1023             :   }
    1024           0 :   if(size_>1) {
    1025           0 :     comm.Sum(&sigma_[0], sigma_.size());
    1026           0 :     comm.Sum(&MCaccept_, 1);
    1027             :   }
    1028             : 
    1029             :   // update sigma output
    1030             :   std::string num;
    1031           0 :   Tools::convert(nGMM, num);
    1032           0 :   getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
    1033           0 : }
    1034             : 
    1035           0 : std::vector<double> EMMI::read_exp_errors(const std::string & errfile) {
    1036             :   int nexp, idcomp;
    1037             :   double err;
    1038             :   std::vector<double> exp_err;
    1039             : // open file
    1040           0 :   IFile *ifile = new IFile();
    1041           0 :   if(ifile->FileExist(errfile)) {
    1042           0 :     ifile->open(errfile);
    1043             :     // scan for number of experimental errors
    1044           0 :     ifile->scanField("Nexp", nexp);
    1045             :     // cycle on GMM components
    1046           0 :     while(ifile->scanField("Id",idcomp)) {
    1047             :       // total experimental error
    1048           0 :       double err_tot = 0.0;
    1049             :       // cycle on number of experimental overlaps
    1050           0 :       for(unsigned i=0; i<nexp; ++i) {
    1051             :         std::string ss;
    1052           0 :         Tools::convert(i,ss);
    1053           0 :         ifile->scanField("Err"+ss, err);
    1054             :         // add to total error
    1055           0 :         err_tot += err*err;
    1056             :       }
    1057             :       // new line
    1058           0 :       ifile->scanField();
    1059             :       // calculate RMSE
    1060           0 :       err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
    1061             :       // add to global
    1062           0 :       exp_err.push_back(err_tot);
    1063             :     }
    1064           0 :     ifile->close();
    1065             :   } else {
    1066           0 :     error("Cannot find ERR_FILE "+errfile+"\n");
    1067             :   }
    1068           0 :   return exp_err;
    1069             : }
    1070             : 
    1071           0 : std::vector<double> EMMI::read_exp_overlaps(const std::string & ovfile) {
    1072             :   int idcomp;
    1073             :   double ov;
    1074             :   std::vector<double> ovdd;
    1075             : // open file
    1076           0 :   IFile *ifile = new IFile();
    1077           0 :   if(ifile->FileExist(ovfile)) {
    1078           0 :     ifile->open(ovfile);
    1079             :     // cycle on GMM components
    1080           0 :     while(ifile->scanField("Id",idcomp)) {
    1081             :       // read experimental overlap
    1082           0 :       ifile->scanField("Overlap", ov);
    1083             :       // add to ovdd
    1084           0 :       ovdd.push_back(ov);
    1085             :       // new line
    1086           0 :       ifile->scanField();
    1087             :     }
    1088           0 :     ifile->close();
    1089             :   } else {
    1090           0 :     error("Cannot find OV_FILE "+ovfile+"\n");
    1091             :   }
    1092           0 :   return ovdd;
    1093             : }
    1094             : 
    1095          30 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms) {
    1096             :   // list of weights - one per atom
    1097             :   std::vector<double> GMM_m_w;
    1098             : 
    1099          30 :   auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
    1100             :   // map of atom types to A and B coefficients of scattering factor
    1101             :   // f(s) = A * exp(-B*s**2)
    1102             :   // B is in Angstrom squared
    1103             :   // map between an atom type and an index
    1104             :   std::map<std::string, unsigned> type_map;
    1105          30 :   type_map["C"]=0;
    1106          30 :   type_map["O"]=1;
    1107          30 :   type_map["N"]=2;
    1108          30 :   type_map["S"]=3;
    1109             :   // fill in sigma std::vector
    1110          30 :   GMM_m_s_.push_back(0.01*15.146);  // type 0
    1111          30 :   GMM_m_s_.push_back(0.01*8.59722); // type 1
    1112          30 :   GMM_m_s_.push_back(0.01*11.1116); // type 2
    1113          30 :   GMM_m_s_.push_back(0.01*15.8952); // type 3
    1114             :   // fill in weight std::vector
    1115          30 :   GMM_m_w_.push_back(2.49982); // type 0
    1116          30 :   GMM_m_w_.push_back(1.97692); // type 1
    1117          30 :   GMM_m_w_.push_back(2.20402); // type 2
    1118          30 :   GMM_m_w_.push_back(5.14099); // type 3
    1119             : 
    1120             :   // check if MOLINFO line is present
    1121          30 :   if( moldat ) {
    1122          30 :     log<<"  MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
    1123       16906 :     for(unsigned i=0; i<atoms.size(); ++i) {
    1124             :       // get atom name
    1125       16876 :       std::string name = moldat->getAtomName(atoms[i]);
    1126             :       char type;
    1127             :       // get atom type
    1128       16876 :       char first = name.at(0);
    1129             :       // GOLDEN RULE: type is first letter, if not a number
    1130       16876 :       if (!isdigit(first)) {
    1131             :         type = first;
    1132             :         // otherwise is the second
    1133             :       } else {
    1134           0 :         type = name.at(1);
    1135             :       }
    1136             :       // check if key in map
    1137       16876 :       std::string type_s = std::string(1,type);
    1138       16876 :       if(type_map.find(type_s) != type_map.end()) {
    1139             :         // save atom type
    1140       16876 :         GMM_m_type_.push_back(type_map[type_s]);
    1141             :         // this will be normalized in the final density
    1142       16876 :         GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
    1143             :       } else {
    1144           0 :         error("Wrong atom type "+type_s+" from atom name "+name+"\n");
    1145             :       }
    1146             :     }
    1147             :   } else {
    1148           0 :     error("MOLINFO DATA not found\n");
    1149             :   }
    1150          30 :   return GMM_m_w;
    1151             : }
    1152             : 
    1153        2052 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w) {
    1154             : 
    1155             : // check if positive defined, by calculating the 3 leading principal minors
    1156        2052 :   double pm1 = cov[0];
    1157        2052 :   double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
    1158        2052 :   double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
    1159             : // apply Sylvester’s criterion
    1160        2052 :   if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0) {
    1161           0 :     error("check data GMM: covariance matrix is not positive defined");
    1162             :   }
    1163             : 
    1164             : // check if weight is positive
    1165        2052 :   if(w<=0) {
    1166           0 :     error("check data GMM: weight must be positive");
    1167             :   }
    1168        2052 : }
    1169             : 
    1170             : // read GMM data file in PLUMED format:
    1171          30 : void EMMI::get_GMM_d(const std::string & GMM_file) {
    1172          30 :   VectorGeneric<6> cov;
    1173             : 
    1174             : // open file
    1175             :   auto ifile=Tools::make_unique<IFile>();
    1176          30 :   if(ifile->FileExist(GMM_file)) {
    1177          30 :     ifile->open(GMM_file);
    1178             :     int idcomp;
    1179        4164 :     while(ifile->scanField("Id",idcomp)) {
    1180             :       int beta;
    1181             :       double w, m0, m1, m2;
    1182        4104 :       ifile->scanField("Weight",w);
    1183        4104 :       ifile->scanField("Mean_0",m0);
    1184        4104 :       ifile->scanField("Mean_1",m1);
    1185        4104 :       ifile->scanField("Mean_2",m2);
    1186        4104 :       ifile->scanField("Cov_00",cov[0]);
    1187        4104 :       ifile->scanField("Cov_01",cov[1]);
    1188        4104 :       ifile->scanField("Cov_02",cov[2]);
    1189        4104 :       ifile->scanField("Cov_11",cov[3]);
    1190        4104 :       ifile->scanField("Cov_12",cov[4]);
    1191        4104 :       ifile->scanField("Cov_22",cov[5]);
    1192        2052 :       ifile->scanField("Beta",beta);
    1193             :       // check input
    1194        2052 :       check_GMM_d(cov, w);
    1195             :       // check beta
    1196        2052 :       if(beta<0) {
    1197           0 :         error("Beta must be positive!");
    1198             :       }
    1199             :       // center of the Gaussian
    1200        2052 :       GMM_d_m_.push_back(Vector(m0,m1,m2));
    1201             :       // covariance matrix
    1202        2052 :       GMM_d_cov_.push_back(cov);
    1203             :       // weight
    1204        2052 :       GMM_d_w_.push_back(w);
    1205             :       // beta
    1206        2052 :       GMM_d_beta_.push_back(beta);
    1207             :       // new line
    1208        2052 :       ifile->scanField();
    1209             :     }
    1210             :   } else {
    1211           0 :     error("Cannot find GMM_FILE "+GMM_file+"\n");
    1212             :   }
    1213             :   // now create a set from beta (unique set of values)
    1214          30 :   std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
    1215             :   // now prepare the group std::vector
    1216          30 :   GMM_d_grps_.resize(bu.size());
    1217             :   // and fill it in
    1218        2082 :   for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
    1219        2052 :     if(GMM_d_beta_[i]>=GMM_d_grps_.size()) {
    1220           0 :       error("Check Beta values");
    1221             :     }
    1222        2052 :     GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
    1223             :   }
    1224          30 : }
    1225             : 
    1226          30 : void EMMI::calculate_useful_stuff(double reso) {
    1227             :   // We use the following definition for resolution:
    1228             :   // the Fourier transform of the density distribution in real space
    1229             :   // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
    1230             :   // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
    1231             :   // average value of B
    1232             :   double Bave = 0.0;
    1233       16906 :   for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
    1234       16876 :     Bave += GMM_m_s_[GMM_m_type_[i]];
    1235             :   }
    1236          30 :   Bave /= static_cast<double>(GMM_m_type_.size());
    1237             :   // calculate blur factor
    1238             :   double blur = 0.0;
    1239          30 :   if(reso*reso>Bave) {
    1240           2 :     blur = reso*reso-Bave;
    1241             :   } else {
    1242          56 :     warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
    1243             :   }
    1244             :   // add blur to B
    1245         150 :   for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
    1246         120 :     GMM_m_s_[i] += blur;
    1247             :   }
    1248             :   // calculate average resolution
    1249             :   double ave_res = 0.0;
    1250       16906 :   for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
    1251       16876 :     ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
    1252             :   }
    1253          30 :   ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
    1254          30 :   log.printf("  experimental map resolution : %3.2f\n", reso);
    1255          30 :   log.printf("  predicted map resolution : %3.2f\n", ave_res);
    1256          30 :   log.printf("  blur factor : %f\n", blur);
    1257             :   // now calculate useful stuff
    1258          30 :   VectorGeneric<6> cov, sum, inv_sum;
    1259             :   // cycle on all atoms types (4 for the moment)
    1260         150 :   for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
    1261             :     // the Gaussian in density (real) space is the FT of scattering factor
    1262             :     // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
    1263         120 :     double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
    1264             :     // covariance matrix for spherical Gaussian
    1265         120 :     cov[0]=s*s;
    1266         120 :     cov[1]=0.0;
    1267         120 :     cov[2]=0.0;
    1268         120 :     cov[3]=s*s;
    1269         120 :     cov[4]=0.0;
    1270         120 :     cov[5]=s*s;
    1271             :     // cycle on all data GMM
    1272        8328 :     for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
    1273             :       // we need the sum of the covariance matrices
    1274       57456 :       for(unsigned k=0; k<6; ++k) {
    1275       49248 :         sum[k] = cov[k] + GMM_d_cov_[j][k];
    1276             :       }
    1277             :       // and to calculate its determinant
    1278        8208 :       double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
    1279        8208 :       det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
    1280        8208 :       det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
    1281             :       // calculate prefactor - model weights are already normalized
    1282        8208 :       double pre_fact =  cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
    1283             :       // and its inverse
    1284        8208 :       inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
    1285        8208 :       inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
    1286        8208 :       inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
    1287        8208 :       inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
    1288        8208 :       inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
    1289        8208 :       inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
    1290             :       // now we store the prefactor
    1291        8208 :       pre_fact_.push_back(pre_fact);
    1292             :       // and the inverse of the sum
    1293        8208 :       inv_cov_md_.push_back(inv_sum);
    1294             :     }
    1295             :   }
    1296             :   // tabulate exponential
    1297          30 :   dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
    1298    30000030 :   for(unsigned i=0; i<nexp_; ++i) {
    1299    30000000 :     tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
    1300             :   }
    1301          30 : }
    1302             : 
    1303             : // get prefactors
    1304     1622268 : double EMMI::get_prefactor_inverse
    1305             : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
    1306             :  double GMM_w_0, double GMM_w_1,
    1307             :  VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum) {
    1308             : // we need the sum of the covariance matrices
    1309    11355876 :   for(unsigned k=0; k<6; ++k) {
    1310     9733608 :     sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
    1311             :   }
    1312             : 
    1313             : // and to calculate its determinant
    1314     1622268 :   double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
    1315     1622268 :   det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
    1316     1622268 :   det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
    1317             : 
    1318             : // the prefactor is
    1319     1622268 :   double pre_fact =  cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
    1320             : 
    1321             : // and its inverse
    1322     1622268 :   inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
    1323     1622268 :   inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
    1324     1622268 :   inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
    1325     1622268 :   inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
    1326     1622268 :   inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
    1327     1622268 :   inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
    1328             : 
    1329             : // return pre-factor
    1330     1622268 :   return pre_fact;
    1331             : }
    1332             : 
    1333        2052 : double EMMI::get_self_overlap(unsigned id) {
    1334             :   double ov_tot = 0.0;
    1335        2052 :   VectorGeneric<6> sum, inv_sum;
    1336        2052 :   Vector ov_der;
    1337             : // start loop
    1338     1624320 :   for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
    1339             :     // call auxiliary method
    1340     1622268 :     double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
    1341     1622268 :                                             GMM_d_w_[id],   GMM_d_w_[i], sum, inv_sum);
    1342             :     // add overlap to ov_tot
    1343     1622268 :     ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
    1344             :   }
    1345             : // and return it
    1346        2052 :   return ov_tot;
    1347             : }
    1348             : 
    1349             : // get overlap and derivatives
    1350     3737255 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
    1351             :                          const VectorGeneric<6> &inv_cov_md, Vector &ov_der) {
    1352     3737255 :   Vector md;
    1353             :   // calculate std::vector difference m_m-d_m with/without pbc
    1354     3737255 :   if(pbc_) {
    1355           0 :     md = pbcDistance(d_m, m_m);
    1356             :   } else {
    1357     3737255 :     md = delta(d_m, m_m);
    1358             :   }
    1359             :   // calculate product of transpose of md and inv_cov_md
    1360     3737255 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
    1361     3737255 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
    1362     3737255 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
    1363             :   // calculate product of prod and md
    1364     3737255 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
    1365             :   // final calculation
    1366     3737255 :   ov = pre_fact * std::exp(-0.5*ov);
    1367             :   // derivatives
    1368     3737255 :   ov_der = ov * Vector(p_x, p_y, p_z);
    1369     3737255 :   return ov;
    1370             : }
    1371             : 
    1372             : // get the exponent of the overlap
    1373    59106708 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
    1374             :                              const VectorGeneric<6> &inv_cov_md) {
    1375    59106708 :   Vector md;
    1376             :   // calculate std::vector difference m_m-d_m with/without pbc
    1377    59106708 :   if(pbc_) {
    1378           0 :     md = pbcDistance(d_m, m_m);
    1379             :   } else {
    1380    59106708 :     md = delta(d_m, m_m);
    1381             :   }
    1382             :   // calculate product of transpose of md and inv_cov_md
    1383    59106708 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
    1384    59106708 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
    1385    59106708 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
    1386             :   // calculate product of prod and md
    1387    59106708 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
    1388    59106708 :   return ov;
    1389             : }
    1390             : 
    1391       18180 : void EMMI::update_neighbor_list() {
    1392             :   // dimension of GMM and atom std::vectors
    1393       18180 :   unsigned GMM_d_size = GMM_d_m_.size();
    1394       18180 :   unsigned GMM_m_size = GMM_m_type_.size();
    1395             :   // local neighbor list
    1396             :   std::vector < unsigned > nl_l;
    1397             :   // clear old neighbor list
    1398       18180 :   nl_.clear();
    1399             : 
    1400             :   // cycle on GMM components - in parallel
    1401      118134 :   for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
    1402             :     // overlap lists and map
    1403             :     std::vector<double> ov_l;
    1404             :     std::map<double, unsigned> ov_m;
    1405             :     // total overlap with id
    1406             :     double ov_tot = 0.0;
    1407             :     // cycle on all atoms
    1408    59206662 :     for(unsigned im=0; im<GMM_m_size; ++im) {
    1409             :       // get index in auxiliary lists
    1410    59106708 :       unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
    1411             :       // calculate exponent of overlap
    1412    59106708 :       double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
    1413             :       // get index of 0.5*expov in tabulated exponential
    1414    59106708 :       unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
    1415             :       // check boundaries and skip atom in case
    1416    59106708 :       if(itab >= tab_exp_.size()) {
    1417    54971808 :         continue;
    1418             :       }
    1419             :       // in case calculate overlap
    1420     4134900 :       double ov = pre_fact_[kaux] * tab_exp_[itab];
    1421             :       // add to list
    1422     4134900 :       ov_l.push_back(ov);
    1423             :       // and map to retrieve atom index
    1424     4134900 :       ov_m[ov] = im;
    1425             :       // increase ov_tot
    1426     4134900 :       ov_tot += ov;
    1427             :     }
    1428             :     // check if zero size -> ov_tot = 0
    1429       99954 :     if(ov_l.size()==0) {
    1430             :       continue;
    1431             :     }
    1432             :     // define cutoff
    1433       99878 :     double ov_cut = ov_tot * nl_cutoff_;
    1434             :     // sort ov_l in ascending order
    1435       99878 :     std::sort(ov_l.begin(), ov_l.end());
    1436             :     // integrate ov_l
    1437             :     double res = 0.0;
    1438     2146886 :     for(unsigned i=0; i<ov_l.size(); ++i) {
    1439     2146886 :       res += ov_l[i];
    1440             :       // if exceeding the cutoff for overlap, stop
    1441     2146886 :       if(res >= ov_cut) {
    1442             :         break;
    1443             :       } else {
    1444             :         ov_m.erase(ov_l[i]);
    1445             :       }
    1446             :     }
    1447             :     // now add atoms to neighborlist
    1448     2187770 :     for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it) {
    1449     2087892 :       nl_l.push_back(id*GMM_m_size+it->second);
    1450             :     }
    1451             :     // end cycle on GMM components in parallel
    1452             :   }
    1453             :   // find total dimension of neighborlist
    1454       18180 :   std::vector <int> recvcounts(size_, 0);
    1455       18180 :   recvcounts[rank_] = nl_l.size();
    1456       18180 :   comm.Sum(&recvcounts[0], size_);
    1457             :   int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
    1458             :   // resize neighbor stuff
    1459       18180 :   nl_.resize(tot_size);
    1460             :   // calculate std::vector of displacement
    1461       18180 :   std::vector<int> disp(size_);
    1462       18180 :   disp[0] = 0;
    1463             :   int rank_size = 0;
    1464       32724 :   for(unsigned i=0; i<size_-1; ++i) {
    1465       14544 :     rank_size += recvcounts[i];
    1466       14544 :     disp[i+1] = rank_size;
    1467             :   }
    1468             :   // Allgather neighbor list
    1469       18180 :   comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
    1470             :   // now resize derivatives
    1471       18180 :   ovmd_der_.resize(tot_size);
    1472       18180 : }
    1473             : 
    1474          70 : void EMMI::prepare() {
    1475          70 :   if(getExchangeStep()) {
    1476           0 :     first_time_=true;
    1477             :   }
    1478          70 : }
    1479             : 
    1480             : // overlap calculator
    1481       18220 : void EMMI::calculate_overlap() {
    1482             : 
    1483       18220 :   if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
    1484       18180 :     update_neighbor_list();
    1485       18180 :     first_time_=false;
    1486             :   }
    1487             : 
    1488             :   // clean temporary std::vectors
    1489      192892 :   for(unsigned i=0; i<ovmd_.size(); ++i) {
    1490      174672 :     ovmd_[i] = 0.0;
    1491             :   }
    1492     3525024 :   for(unsigned i=0; i<ovmd_der_.size(); ++i) {
    1493     3506804 :     ovmd_der_[i] = Vector(0,0,0);
    1494             :   }
    1495             : 
    1496             :   // we have to cycle over all model and data GMM components in the neighbor list
    1497       18220 :   unsigned GMM_d_size = GMM_d_m_.size();
    1498       18220 :   unsigned GMM_m_size = GMM_m_type_.size();
    1499     2133207 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
    1500             :     // get data (id) and atom (im) indexes
    1501     2114987 :     unsigned id = nl_[i] / GMM_m_size;
    1502     2114987 :     unsigned im = nl_[i] % GMM_m_size;
    1503             :     // get index in auxiliary lists
    1504     2114987 :     unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
    1505             :     // add overlap with im component of model GMM
    1506     2114987 :     ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
    1507     2114987 :                              inv_cov_md_[kaux], ovmd_der_[i]);
    1508             :   }
    1509             :   // communicate stuff
    1510       18220 :   if(size_>1) {
    1511       14574 :     comm.Sum(&ovmd_[0], ovmd_.size());
    1512       14574 :     comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
    1513             :   }
    1514       18220 : }
    1515             : 
    1516           0 : double EMMI::scaleEnergy(double s) {
    1517             :   double ene = 0.0;
    1518           0 :   for(unsigned i=0; i<ovdd_.size(); ++i) {
    1519           0 :     ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
    1520             :   }
    1521           0 :   return ene;
    1522             : }
    1523             : 
    1524           0 : double EMMI::doRegression() {
    1525             : // standard MC parameters
    1526             :   unsigned MCsteps = 100000;
    1527             :   double kbtmin = 1.0;
    1528             :   double kbtmax = 10.0;
    1529             :   unsigned ncold = 5000;
    1530             :   unsigned nhot = 2000;
    1531             :   double MCacc = 0.0;
    1532             :   double kbt, ebest, scale_best;
    1533             : 
    1534             : // initial value of scale factor and energy
    1535           0 :   double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
    1536           0 :   double ene = scaleEnergy(scale);
    1537             : // set best energy
    1538           0 :   ebest = ene;
    1539             : 
    1540             : // MC loop
    1541           0 :   for(unsigned istep=0; istep<MCsteps; ++istep) {
    1542             :     // get temperature
    1543           0 :     if(istep%(ncold+nhot)<ncold) {
    1544             :       kbt = kbtmin;
    1545             :     } else {
    1546             :       kbt = kbtmax;
    1547             :     }
    1548             :     // propose move in scale
    1549           0 :     double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
    1550           0 :     double new_scale = scale + ds;
    1551             :     // check boundaries
    1552           0 :     if(new_scale > scale_max_) {
    1553           0 :       new_scale = 2.0 * scale_max_ - new_scale;
    1554             :     }
    1555           0 :     if(new_scale < scale_min_) {
    1556           0 :       new_scale = 2.0 * scale_min_ - new_scale;
    1557             :     }
    1558             :     // new energy
    1559           0 :     double new_ene = scaleEnergy(new_scale);
    1560             :     // accept or reject
    1561           0 :     bool accept = doAccept(ene, new_ene, kbt);
    1562             :     // in case of acceptance
    1563           0 :     if(accept) {
    1564             :       scale = new_scale;
    1565             :       ene = new_ene;
    1566           0 :       MCacc += 1.0;
    1567             :     }
    1568             :     // save best
    1569           0 :     if(ene<ebest) {
    1570           0 :       ebest = ene;
    1571           0 :       scale_best = scale;
    1572             :     }
    1573             :   }
    1574             : // calculate acceptance
    1575           0 :   double accscale = MCacc / static_cast<double>(MCsteps);
    1576             : // global communication
    1577           0 :   if(!no_aver_ && nrep_>1) {
    1578           0 :     if(replica_!=0) {
    1579           0 :       scale_best = 0.0;
    1580           0 :       ebest = 0.0;
    1581           0 :       accscale = 0.0;
    1582             :     }
    1583           0 :     if(rank_==0) {
    1584           0 :       multi_sim_comm.Sum(&scale_best, 1);
    1585           0 :       multi_sim_comm.Sum(&ebest, 1);
    1586           0 :       multi_sim_comm.Sum(&accscale, 1);
    1587             :     }
    1588             :   }
    1589             :   // local communication
    1590           0 :   if(rank_!=0) {
    1591           0 :     scale_best = 0.0;
    1592           0 :     ebest = 0.0;
    1593           0 :     accscale = 0.0;
    1594             :   }
    1595           0 :   if(size_>1) {
    1596           0 :     comm.Sum(&scale_best, 1);
    1597           0 :     comm.Sum(&ebest, 1);
    1598           0 :     comm.Sum(&accscale, 1);
    1599             :   }
    1600             : // set scale parameters
    1601           0 :   getPntrToComponent("accscale")->set(accscale);
    1602           0 :   getPntrToComponent("enescale")->set(ebest);
    1603             : // return scale value
    1604           0 :   return scale_best;
    1605             : }
    1606             : 
    1607          20 : double EMMI::get_annealing(long long int step) {
    1608             : // default no annealing
    1609             :   double fact = 1.0;
    1610             : // position in annealing cycle
    1611          20 :   unsigned nc = step%(4*nanneal_);
    1612             : // useful doubles
    1613          20 :   double ncd = static_cast<double>(nc);
    1614          20 :   double nn  = static_cast<double>(nanneal_);
    1615             : // set fact
    1616          20 :   if(nc>=nanneal_   && nc<2*nanneal_) {
    1617           6 :     fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
    1618             :   }
    1619          20 :   if(nc>=2*nanneal_ && nc<3*nanneal_) {
    1620           4 :     fact = kanneal_;
    1621             :   }
    1622          20 :   if(nc>=3*nanneal_) {
    1623           2 :     fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
    1624             :   }
    1625          20 :   return fact;
    1626             : }
    1627             : 
    1628       18220 : void EMMI::get_weights(double &weight, double &norm, double &neff) {
    1629       18220 :   const double dnrep = static_cast<double>(nrep_);
    1630             :   // calculate the weights either from BIAS
    1631       18220 :   if(do_reweight_) {
    1632          12 :     std::vector<double> bias(nrep_,0);
    1633          12 :     if(rank_==0) {
    1634          12 :       bias[replica_] = getArgument(0);
    1635          12 :       if(nrep_>1) {
    1636          12 :         multi_sim_comm.Sum(&bias[0], nrep_);
    1637             :       }
    1638             :     }
    1639          12 :     comm.Sum(&bias[0], nrep_);
    1640             : 
    1641             :     // accumulate weights
    1642          12 :     if(!first_time_w_) {
    1643          30 :       for(unsigned i=0; i<nrep_; ++i) {
    1644          20 :         const double delta=bias[i]-average_weights_[i];
    1645             :         // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
    1646             :         // probably because we're making several calls to calculate(), causing accumulation
    1647             :         // of epsilons. Maybe we can work on a temporary copy of the action instead?
    1648          20 :         average_weights_[i]+=decay_w_*delta;
    1649             :       }
    1650             :     } else {
    1651           2 :       first_time_w_ = false;
    1652           6 :       for(unsigned i=0; i<nrep_; ++i) {
    1653           4 :         average_weights_[i] = bias[i];
    1654             :       }
    1655             :     }
    1656             : 
    1657             :     // set average back into bias and set norm to one
    1658          12 :     const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
    1659          36 :     for(unsigned i=0; i<nrep_; ++i) {
    1660          24 :       bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
    1661             :     }
    1662             :     // set local weight, norm and weight variance
    1663          12 :     weight = bias[replica_];
    1664             :     double w2=0.;
    1665          36 :     for(unsigned i=0; i<nrep_; ++i) {
    1666          24 :       w2 += bias[i]*bias[i];
    1667          24 :       norm += bias[i];
    1668             :     }
    1669          12 :     neff = norm*norm/w2;
    1670          24 :     getPntrToComponent("weight")->set(weight/norm);
    1671             :   } else {
    1672             :     // or arithmetic ones
    1673       18208 :     neff = dnrep;
    1674       18208 :     weight = 1.0;
    1675       18208 :     norm = dnrep;
    1676             :   }
    1677       18220 :   getPntrToComponent("neff")->set(neff);
    1678       18220 : }
    1679             : 
    1680       18220 : void EMMI::calculate() {
    1681             : 
    1682             : // calculate CV
    1683       18220 :   calculate_overlap();
    1684             : 
    1685             :   // rescale factor for ensemble average
    1686       18220 :   double weight = 0.;
    1687       18220 :   double neff = 0.;
    1688       18220 :   double norm = 0.;
    1689       18220 :   get_weights(weight, norm, neff);
    1690             : 
    1691             :   // in case of ensemble averaging, calculate average overlap
    1692       18220 :   if(!no_aver_ && nrep_>1) {
    1693             :     // if master node, calculate average across replicas
    1694          12 :     if(rank_==0) {
    1695       10812 :       for(unsigned i=0; i<ovmd_.size(); ++i) {
    1696       10800 :         ovmd_ave_[i] = weight / norm * ovmd_[i];
    1697             :       }
    1698          12 :       multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
    1699             :     } else {
    1700           0 :       for(unsigned i=0; i<ovmd_ave_.size(); ++i) {
    1701           0 :         ovmd_ave_[i] = 0.0;
    1702             :       }
    1703             :     }
    1704             :     // local communication
    1705          12 :     if(size_>1) {
    1706           0 :       comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
    1707             :     }
    1708             :   } else {
    1709      182080 :     for(unsigned i=0; i<ovmd_.size(); ++i) {
    1710      163872 :       ovmd_ave_[i] = ovmd_[i];
    1711             :     }
    1712             :   }
    1713             : 
    1714             :   // get time step
    1715       18220 :   long long int step = getStep();
    1716             : 
    1717             :   // do regression
    1718       18220 :   if(nregres_>0) {
    1719           0 :     if(step%nregres_==0 && !getExchangeStep()) {
    1720           0 :       scale_ = doRegression();
    1721             :     }
    1722             :     // set scale component
    1723           0 :     getPntrToComponent("scale")->set(scale_);
    1724             :   }
    1725             : 
    1726             :   // write model overlap to file
    1727       18220 :   if(ovstride_>0 && step%ovstride_==0) {
    1728           4 :     write_model_overlap(step);
    1729             :   }
    1730             : 
    1731             :   // clear energy and virial
    1732       18220 :   ene_ = 0.0;
    1733       18220 :   virial_.zero();
    1734             : 
    1735             :   // Gaussian noise
    1736       18220 :   if(noise_==0) {
    1737        7318 :     calculate_Gauss();
    1738             :   }
    1739             : 
    1740             :   // Outliers noise
    1741       18220 :   if(noise_==1) {
    1742        5451 :     calculate_Outliers();
    1743             :   }
    1744             : 
    1745             :   // Marginal noise
    1746       18220 :   if(noise_==2) {
    1747        5451 :     calculate_Marginal();
    1748             :   }
    1749             : 
    1750             :   // get annealing rescale factor
    1751       18220 :   if(nanneal_>0) {
    1752          20 :     anneal_ = get_annealing(step);
    1753          40 :     getPntrToComponent("anneal")->set(anneal_);
    1754             :   }
    1755             : 
    1756             :   // annealing rescale
    1757       18220 :   ene_ /= anneal_;
    1758             : 
    1759       18220 :   std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
    1760             :   // in case of ensemble averaging
    1761       18220 :   if(!no_aver_ && nrep_>1) {
    1762             :     // if master node, sum der_GMMid derivatives and ene
    1763          12 :     if(rank_==0) {
    1764       10812 :       for(unsigned i=0; i<GMMid_der_.size(); ++i) {
    1765       10800 :         GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
    1766             :       }
    1767          12 :       multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
    1768          12 :       multi_sim_comm.Sum(&ene_, 1);
    1769             :     } else {
    1770             :       // set der_GMMid derivatives and energy to zero
    1771           0 :       for(unsigned i=0; i<GMMid_der_av_.size(); ++i) {
    1772           0 :         GMMid_der_av_[i]=0.0;
    1773             :       }
    1774           0 :       ene_ = 0.0;
    1775             :     }
    1776             :     // local communication
    1777          12 :     if(size_>1) {
    1778           0 :       comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
    1779           0 :       comm.Sum(&ene_, 1);
    1780             :     }
    1781             :   } else {
    1782      182080 :     for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
    1783      163872 :       GMMid_der_av_[i] = GMMid_der_[i];
    1784             :     }
    1785             :   }
    1786             : 
    1787             :   // clean temporary std::vector
    1788    10979556 :   for(unsigned i=0; i<atom_der_.size(); ++i) {
    1789    10961336 :     atom_der_[i] = Vector(0,0,0);
    1790             :   }
    1791             : 
    1792             :   // get derivatives of bias with respect to atoms
    1793     2133207 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
    1794             :     // get indexes of data and model component
    1795     2114987 :     unsigned id = nl_[i] / GMM_m_type_.size();
    1796     2114987 :     unsigned im = nl_[i] % GMM_m_type_.size();
    1797             :     // chain rule + replica normalization
    1798     2114987 :     Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
    1799     2114987 :     Vector pos;
    1800     2114987 :     if(pbc_) {
    1801           0 :       pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
    1802             :     } else {
    1803     2114987 :       pos = getPosition(im);
    1804             :     }
    1805             :     // increment derivatives and virial
    1806     2114987 :     atom_der_[im] += tot_der;
    1807     2114987 :     virial_ += Tensor(pos, -tot_der);
    1808             :   }
    1809             : 
    1810             :   // communicate local derivatives and virial
    1811       18220 :   if(size_>1) {
    1812       14574 :     comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
    1813       14574 :     comm.Sum(virial_);
    1814             :   }
    1815             : 
    1816             :   // set derivatives, virial, and score
    1817    10979556 :   for(unsigned i=0; i<atom_der_.size(); ++i) {
    1818    21922672 :     setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
    1819             :   }
    1820       18220 :   setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
    1821       18220 :   getPntrToComponent("scoreb")->set(ene_);
    1822             : 
    1823       18220 :   if (do_reweight_) {
    1824             :     double w_tmp = 0.;
    1825       10812 :     for (unsigned i = 0; i < ovmd_.size(); ++i) {
    1826       10800 :       w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
    1827             :     }
    1828          12 :     w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
    1829             : 
    1830          12 :     setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
    1831          24 :     getPntrToComponent("biasDer")->set(w_tmp);
    1832             :   }
    1833             : 
    1834             :   // This part is needed only for Gaussian and Outliers noise models
    1835       18220 :   if(noise_!=2) {
    1836             : 
    1837             :     // do Montecarlo
    1838       12769 :     if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) {
    1839           0 :       doMonteCarlo();
    1840             :     }
    1841             : 
    1842             :     // print status
    1843       12769 :     if(step%statusstride_==0) {
    1844       12729 :       print_status(step);
    1845             :     }
    1846             : 
    1847             :     // calculate acceptance ratio
    1848       12769 :     double acc = MCaccept_ / MCtrials_;
    1849             : 
    1850             :     // set value
    1851       25538 :     getPntrToComponent("acc")->set(acc);
    1852             : 
    1853             :   }
    1854             : 
    1855       18220 : }
    1856             : 
    1857        7318 : void EMMI::calculate_Gauss() {
    1858             :   // cycle on all the GMM groups
    1859       14636 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1860             :     double eneg = 0.0;
    1861             :     // cycle on all the members of the group
    1862       83872 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1863             :       // id of the GMM component
    1864       76554 :       int GMMid = GMM_d_grps_[i][j];
    1865             :       // calculate deviation
    1866       76554 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
    1867             :       // add to group energy
    1868       76554 :       eneg += 0.5 * dev * dev;
    1869             :       // store derivative for later
    1870       76554 :       GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
    1871             :     }
    1872             :     // add to total energy along with normalizations and prior
    1873        7318 :     ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
    1874             :   }
    1875        7318 : }
    1876             : 
    1877        5451 : void EMMI::calculate_Outliers() {
    1878             :   // cycle on all the GMM groups
    1879       10902 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1880             :     // cycle on all the members of the group
    1881             :     double eneg = 0.0;
    1882       54510 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1883             :       // id of the GMM component
    1884       49059 :       int GMMid = GMM_d_grps_[i][j];
    1885             :       // calculate deviation
    1886       49059 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
    1887             :       // add to group energy
    1888       49059 :       eneg += std::log1p( 0.5 * dev * dev );
    1889             :       // store derivative for later
    1890       49059 :       GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
    1891             :     }
    1892             :     // add to total energy along with normalizations and prior
    1893        5451 :     ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
    1894             :   }
    1895        5451 : }
    1896             : 
    1897        5451 : void EMMI::calculate_Marginal() {
    1898             :   // cycle on all the GMM groups
    1899       10902 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1900             :     // cycle on all the members of the group
    1901       54510 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1902             :       // id of the GMM component
    1903       49059 :       int GMMid = GMM_d_grps_[i][j];
    1904             :       // calculate deviation
    1905       49059 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
    1906             :       // calculate errf
    1907       49059 :       double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
    1908             :       // add to group energy
    1909       49059 :       ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
    1910             :       // store derivative for later
    1911       49059 :       GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
    1912             :     }
    1913             :   }
    1914        5451 : }
    1915             : 
    1916             : }
    1917             : }

Generated by: LCOV version 1.16