LCOV - code coverage report
Current view: top level - isdb - EMMI.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 588 778 75.6 %
Date: 2024-10-18 13:59:31 Functions: 29 39 74.4 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2017-2023 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "core/ActionWithValue.h"
      23             : #include "core/ActionAtomistic.h"
      24             : #include "core/ActionWithArguments.h"
      25             : #include "core/ActionRegister.h"
      26             : #include "core/PlumedMain.h"
      27             : #include "tools/Matrix.h"
      28             : #include "tools/Communicator.h"
      29             : #include "core/GenericMolInfo.h"
      30             : #include "core/ActionSet.h"
      31             : #include "tools/File.h"
      32             : #include "tools/Random.h"
      33             : 
      34             : #include <string>
      35             : #include <map>
      36             : #include <numeric>
      37             : #include <ctime>
      38             : 
      39             : namespace PLMD {
      40             : namespace isdb {
      41             : 
      42             : //+PLUMEDOC ISDB_COLVAR EMMI
      43             : /*
      44             : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
      45             : 
      46             : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in  Ref. \cite Hanot113951 .
      47             : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
      48             : 
      49             : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
      50             : GMM_FILE. We are currently working on a web server to perform
      51             : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
      52             : 
      53             : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
      54             : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
      55             : the Metainference approach \cite Bonomi:2016ip .
      56             : 
      57             : \warning
      58             : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
      59             : 
      60             : \note
      61             : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
      62             : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
      63             : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
      64             : 
      65             : \note
      66             : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
      67             : add the NOPBC flag to the input line
      68             : 
      69             : \par Examples
      70             : 
      71             : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
      72             : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
      73             : 
      74             : \plumedfile
      75             : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
      76             :      0  2.9993805e+01   6.54628 10.37820 -0.92988  2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02  1
      77             :      1  2.3468312e+01   6.56095 10.34790 -0.87808  1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02  1
      78             :      ...
      79             : \endplumedfile
      80             : 
      81             : To accelerate the computation of the Bayesian score, one can:
      82             : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
      83             : - calculate the restraint every other step (or more).
      84             : 
      85             : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
      86             : using a GROMACS index file.
      87             : 
      88             : The input file looks as follows:
      89             : 
      90             : \plumedfile
      91             : # include pdb info
      92             : MOLINFO STRUCTURE=prot.pdb
      93             : 
      94             : #  all heavy atoms
      95             : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
      96             : 
      97             : # create EMMI score
      98             : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
      99             : 
     100             : # translate into bias - apply every 2 steps
     101             : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
     102             : 
     103             : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
     104             : \endplumedfile
     105             : 
     106             : 
     107             : */
     108             : //+ENDPLUMEDOC
     109             : 
     110             : class EMMI :
     111             :   public ActionAtomistic,
     112             :   public ActionWithArguments,
     113             :   public ActionWithValue
     114             : {
     115             : private:
     116             : 
     117             : // temperature in kbt
     118             :   double kbt_;
     119             : // model GMM - atom types
     120             :   std::vector<unsigned> GMM_m_type_;
     121             : // model GMM - list of atom sigmas - one per atom type
     122             :   std::vector<double> GMM_m_s_;
     123             : // model GMM - list of atom weights - one per atom type
     124             :   std::vector<double> GMM_m_w_;
     125             : // data GMM - means, weights, and covariances + beta option
     126             :   std::vector<Vector>             GMM_d_m_;
     127             :   std::vector<double>             GMM_d_w_;
     128             :   std::vector< VectorGeneric<6> > GMM_d_cov_;
     129             :   std::vector<int>                GMM_d_beta_;
     130             :   std::vector < std::vector<int> >     GMM_d_grps_;
     131             : // overlaps
     132             :   std::vector<double> ovmd_;
     133             :   std::vector<double> ovdd_;
     134             :   std::vector<double> ovmd_ave_;
     135             : // and derivatives
     136             :   std::vector<Vector> ovmd_der_;
     137             :   std::vector<Vector> atom_der_;
     138             :   std::vector<double> GMMid_der_;
     139             : // constants
     140             :   double cfact_;
     141             :   double inv_sqrt2_, sqrt2_pi_;
     142             : // metainference
     143             :   unsigned nrep_;
     144             :   unsigned replica_;
     145             :   std::vector<double> sigma_;
     146             :   std::vector<double> sigma_min_;
     147             :   std::vector<double> sigma_max_;
     148             :   std::vector<double> dsigma_;
     149             : // list of prefactors for overlap between two Gaussians
     150             : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
     151             :   std::vector<double> pre_fact_;
     152             : // inverse of the sum of model and data covariances matrices
     153             :   std::vector< VectorGeneric<6> > inv_cov_md_;
     154             : // neighbor list
     155             :   double   nl_cutoff_;
     156             :   unsigned nl_stride_;
     157             :   bool first_time_;
     158             :   bool no_aver_;
     159             :   std::vector<unsigned> nl_;
     160             : // parallel stuff
     161             :   unsigned size_;
     162             :   unsigned rank_;
     163             : // pbc
     164             :   bool pbc_;
     165             : // Monte Carlo stuff
     166             :   int      MCstride_;
     167             :   double   MCaccept_;
     168             :   double   MCtrials_;
     169             :   Random   random_;
     170             :   // status stuff
     171             :   unsigned int statusstride_;
     172             :   std::string       statusfilename_;
     173             :   OFile        statusfile_;
     174             :   bool         first_status_;
     175             :   // regression
     176             :   unsigned nregres_;
     177             :   double scale_;
     178             :   double scale_min_;
     179             :   double scale_max_;
     180             :   double dscale_;
     181             :   // tabulated exponential
     182             :   double dpcutoff_;
     183             :   double dexp_;
     184             :   unsigned nexp_;
     185             :   std::vector<double> tab_exp_;
     186             :   // simulated annealing
     187             :   unsigned nanneal_;
     188             :   double   kanneal_;
     189             :   double   anneal_;
     190             :   // prior exponent
     191             :   double prior_;
     192             :   // noise type
     193             :   unsigned noise_;
     194             :   // total score and virial;
     195             :   double ene_;
     196             :   Tensor virial_;
     197             :   // model overlap file
     198             :   unsigned int ovstride_;
     199             :   std::string       ovfilename_;
     200             : 
     201             :   // Reweighting additions
     202             :   bool do_reweight_;
     203             :   bool first_time_w_;
     204             :   std::vector<double> forces;
     205             :   std::vector<double> forcesToApply;
     206             : 
     207             :   // average weights
     208             :   double decay_w_;
     209             :   std::vector<double> average_weights_;
     210             : 
     211             : // write file with model overlap
     212             :   void write_model_overlap(long long int step);
     213             : // get median of std::vector
     214             :   double get_median(std::vector<double> &v);
     215             : // annealing
     216             :   double get_annealing(long long int step);
     217             : // do regression
     218             :   double scaleEnergy(double s);
     219             :   double doRegression();
     220             : // read and write status
     221             :   void read_status();
     222             :   void print_status(long long int step);
     223             : // accept or reject
     224             :   bool doAccept(double oldE, double newE, double kbt);
     225             : // do MonteCarlo
     226             :   void doMonteCarlo();
     227             : // read error file
     228             :   std::vector<double> read_exp_errors(const std::string & errfile);
     229             : // read experimental overlaps
     230             :   std::vector<double> read_exp_overlaps(const std::string & ovfile);
     231             : // calculate model GMM weights and covariances
     232             :   std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
     233             : // read data GMM file
     234             :   void get_GMM_d(const std::string & gmm_file);
     235             : // check GMM data
     236             :   void check_GMM_d(const VectorGeneric<6> &cov, double w);
     237             : // auxiliary method
     238             :   void calculate_useful_stuff(double reso);
     239             : // get pref_fact and inv_cov_md
     240             :   double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
     241             :                                 double GMM_w_0, double GMM_w_1,
     242             :                                 VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
     243             : // calculate self overlaps between data GMM components - ovdd_
     244             :   double get_self_overlap(unsigned id);
     245             : // calculate overlap between two Gaussians
     246             :   double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
     247             :                      const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
     248             : // calculate exponent of overlap for neighbor list update
     249             :   double get_exp_overlap(const Vector &m_m, const Vector &d_m,
     250             :                          const VectorGeneric<6> &inv_cov_md);
     251             : // update the neighbor list
     252             :   void update_neighbor_list();
     253             : // calculate overlap
     254             :   void calculate_overlap();
     255             : // Gaussian noise
     256             :   void calculate_Gauss();
     257             : // Outliers noise
     258             :   void calculate_Outliers();
     259             : // Marginal noise
     260             :   void calculate_Marginal();
     261             : 
     262             :   // See MetainferenceBase
     263             :   void get_weights(double &weight, double &norm, double &neff);
     264             : 
     265             : public:
     266             :   static void registerKeywords( Keywords& keys );
     267             :   explicit EMMI(const ActionOptions&);
     268             :   // needed for reweighting
     269             :   void setDerivatives();
     270             :   void turnOnDerivatives() override;
     271             :   unsigned getNumberOfDerivatives() override;
     272             :   void lockRequests() override;
     273             :   void unlockRequests() override;
     274             :   void calculateNumericalDerivatives( ActionWithValue* a ) override;
     275             :   void apply() override;
     276             :   void setArgDerivatives(Value *v, const double &d);
     277             :   void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
     278             :   void setBoxDerivatives(Value*v, const Tensor&d);
     279             : // active methods:
     280             :   void prepare() override;
     281             :   void calculate() override;
     282             : };
     283             : 
     284             : inline
     285          20 : void EMMI::setDerivatives() {
     286             :   // Get appropriate number of derivatives
     287             :   // Derivatives are first for arguments and then for atoms
     288             :   unsigned nder;
     289          20 :   if( getNumberOfAtoms()>0 ) {
     290          20 :     nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
     291             :   } else {
     292           0 :     nder = getNumberOfArguments();
     293             :   }
     294             : 
     295             :   // Resize all derivative arrays
     296          20 :   forces.resize( nder ); forcesToApply.resize( nder );
     297          92 :   for(int i=0; i<getNumberOfComponents(); ++i) getPntrToComponent(i)->resizeDerivatives(nder);
     298          20 : }
     299             : 
     300             : inline
     301          20 : void EMMI::turnOnDerivatives() {
     302          20 :   ActionWithValue::turnOnDerivatives();
     303          20 : }
     304             : 
     305             : inline
     306          72 : unsigned EMMI::getNumberOfDerivatives() {
     307          72 :   if( getNumberOfAtoms()>0 ) {
     308          72 :     return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
     309             :   }
     310           0 :   return getNumberOfArguments();
     311             : }
     312             : 
     313             : inline
     314          30 : void EMMI::lockRequests() {
     315             :   ActionAtomistic::lockRequests();
     316             :   ActionWithArguments::lockRequests();
     317          30 : }
     318             : 
     319             : inline
     320          30 : void EMMI::unlockRequests() {
     321             :   ActionAtomistic::unlockRequests();
     322             :   ActionWithArguments::unlockRequests();
     323          30 : }
     324             : 
     325             : inline
     326           9 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
     327           9 :   if( getNumberOfArguments()>0 ) {
     328           0 :     ActionWithArguments::calculateNumericalDerivatives( a );
     329             :   }
     330           9 :   if( getNumberOfAtoms()>0 ) {
     331           9 :     Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
     332          39 :     for(int j=0; j<getNumberOfComponents(); ++j) {
     333          30 :       for(unsigned i=0; i<getNumberOfArguments(); ++i) if(getPntrToComponent(j)->hasDerivatives()) save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
     334             :     }
     335           9 :     calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
     336          39 :     for(int j=0; j<getNumberOfComponents(); ++j) {
     337          30 :       for(unsigned i=0; i<getNumberOfArguments(); ++i) if(getPntrToComponent(j)->hasDerivatives()) getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
     338             :     }
     339             :   }
     340           9 : }
     341             : 
     342             : inline
     343          30 : void EMMI::apply() {
     344          30 :   bool wasforced=false; forcesToApply.assign(forcesToApply.size(),0.0);
     345         162 :   for(int i=0; i<getNumberOfComponents(); ++i) {
     346         132 :     if( getPntrToComponent(i)->applyForce( forces ) ) {
     347             :       wasforced=true;
     348           0 :       for(unsigned i=0; i<forces.size(); ++i) forcesToApply[i]+=forces[i];
     349             :     }
     350             :   }
     351          30 :   if( wasforced ) {
     352           0 :     unsigned ind=0; addForcesOnArguments( 0, forcesToApply, ind, getLabel() );
     353           0 :     if( getNumberOfAtoms()>0 ) setForcesOnAtoms( forcesToApply, ind );
     354             :   }
     355          30 : }
     356             : 
     357             : inline
     358             : void EMMI::setArgDerivatives(Value *v, const double &d) {
     359             :   v->addDerivative(0,d);
     360             : }
     361             : 
     362             : inline
     363     9844626 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
     364     9844626 :   const unsigned noa=getNumberOfArguments();
     365     9844626 :   v->addDerivative(noa+3*i+0,d[0]);
     366     9844626 :   v->addDerivative(noa+3*i+1,d[1]);
     367     9844626 :   v->addDerivative(noa+3*i+2,d[2]);
     368     9844626 : }
     369             : 
     370             : inline
     371       16365 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
     372       16365 :   const unsigned noa=getNumberOfArguments();
     373             :   const unsigned nat=getNumberOfAtoms();
     374       16365 :   v->addDerivative(noa+3*nat+0,d(0,0));
     375       16365 :   v->addDerivative(noa+3*nat+1,d(0,1));
     376       16365 :   v->addDerivative(noa+3*nat+2,d(0,2));
     377       16365 :   v->addDerivative(noa+3*nat+3,d(1,0));
     378       16365 :   v->addDerivative(noa+3*nat+4,d(1,1));
     379       16365 :   v->addDerivative(noa+3*nat+5,d(1,2));
     380       16365 :   v->addDerivative(noa+3*nat+6,d(2,0));
     381       16365 :   v->addDerivative(noa+3*nat+7,d(2,1));
     382       16365 :   v->addDerivative(noa+3*nat+8,d(2,2));
     383       16365 : }
     384             : 
     385             : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
     386             : 
     387          22 : void EMMI::registerKeywords( Keywords& keys ) {
     388          22 :   Action::registerKeywords( keys );
     389          22 :   ActionAtomistic::registerKeywords( keys );
     390          22 :   ActionWithValue::registerKeywords( keys );
     391          22 :   ActionWithArguments::registerKeywords( keys );
     392          44 :   keys.addInputKeyword("optional","ARG","scalar","the labels of the values from which the function is calculated");
     393          44 :   keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
     394          44 :   keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
     395          44 :   keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
     396          44 :   keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
     397          44 :   keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
     398          44 :   keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
     399          44 :   keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
     400          44 :   keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
     401          44 :   keys.add("optional","SIGMA0","initial value of the uncertainty");
     402          44 :   keys.add("optional","DSIGMA","MC step for uncertainties");
     403          44 :   keys.add("optional","MC_STRIDE", "Monte Carlo stride");
     404          44 :   keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
     405          44 :   keys.add("optional","OV_FILE","file with experimental overlaps");
     406          44 :   keys.add("optional","NORM_DENSITY","integral of the experimental density");
     407          44 :   keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
     408          44 :   keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
     409          44 :   keys.add("optional","REGRESSION","regression stride");
     410          44 :   keys.add("optional","REG_SCALE_MIN","regression minimum scale");
     411          44 :   keys.add("optional","REG_SCALE_MAX","regression maximum scale");
     412          44 :   keys.add("optional","REG_DSCALE","regression maximum scale MC move");
     413          44 :   keys.add("optional","SCALE","scale factor");
     414          44 :   keys.add("optional","ANNEAL", "Length of annealing cycle");
     415          44 :   keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
     416          44 :   keys.add("optional","TEMP","temperature");
     417          44 :   keys.add("optional","PRIOR", "exponent of uncertainty prior");
     418          44 :   keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
     419          44 :   keys.add("optional","WRITE_OV","write a file with model overlaps");
     420          44 :   keys.add("optional","AVERAGING", "Averaging window for weights");
     421          44 :   keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
     422          44 :   keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
     423          44 :   keys.addOutputComponent("scoreb","default","scalar","Bayesian score");
     424          44 :   keys.addOutputComponent("acc",   "NOISETYPE","scalar","MC acceptance for uncertainty");
     425          44 :   keys.addOutputComponent("scale", "REGRESSION","scalar","scale factor");
     426          44 :   keys.addOutputComponent("accscale", "REGRESSION","scalar","MC acceptance for scale regression");
     427          44 :   keys.addOutputComponent("enescale", "REGRESSION","scalar","MC energy for scale regression");
     428          44 :   keys.addOutputComponent("anneal","ANNEAL","scalar","annealing factor");
     429          44 :   keys.addOutputComponent("weight",       "REWEIGHT","scalar",     "weights of the weighted average");
     430          44 :   keys.addOutputComponent("biasDer",      "REWEIGHT","scalar",     "derivatives with respect to the bias");
     431          44 :   keys.addOutputComponent("sigma",      "NOISETYPE","scalar",     "uncertainty in the forward models and experiment");
     432          44 :   keys.addOutputComponent("neff",         "default","scalar",      "effective number of replicas");
     433          22 : }
     434             : 
     435          20 : EMMI::EMMI(const ActionOptions&ao):
     436             :   Action(ao),
     437             :   ActionAtomistic(ao),
     438             :   ActionWithArguments(ao),
     439             :   ActionWithValue(ao),
     440          20 :   inv_sqrt2_(0.707106781186548),
     441          20 :   sqrt2_pi_(0.797884560802865),
     442          20 :   first_time_(true), no_aver_(false), pbc_(true),
     443          20 :   MCstride_(1), MCaccept_(0.), MCtrials_(0.),
     444          40 :   statusstride_(0), first_status_(true),
     445          20 :   nregres_(0), scale_(1.),
     446          20 :   dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
     447          40 :   kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
     448          20 :   do_reweight_(false), first_time_w_(true), decay_w_(1.)
     449             : {
     450             :   // periodic boundary conditions
     451          20 :   bool nopbc=!pbc_;
     452          20 :   parseFlag("NOPBC",nopbc);
     453          20 :   pbc_=!nopbc;
     454             : 
     455             :   // list of atoms
     456             :   std::vector<AtomNumber> atoms;
     457          40 :   parseAtomList("ATOMS", atoms);
     458             : 
     459             :   // file with data GMM
     460             :   std::string GMM_file;
     461          40 :   parse("GMM_FILE", GMM_file);
     462             : 
     463             :   // type of data noise
     464             :   std::string noise;
     465          40 :   parse("NOISETYPE",noise);
     466          20 :   if      (noise=="GAUSS")   noise_ = 0;
     467          12 :   else if(noise=="OUTLIERS") noise_ = 1;
     468           6 :   else if(noise=="MARGINAL") noise_ = 2;
     469           0 :   else error("Unknown noise type!");
     470             : 
     471             :   // minimum value for error
     472             :   double sigma_min;
     473          20 :   parse("SIGMA_MIN", sigma_min);
     474          20 :   if(sigma_min<0) error("SIGMA_MIN should be greater or equal to zero");
     475             : 
     476             :   // the following parameters must be specified with noise type 0 and 1
     477             :   double sigma_ini, dsigma;
     478          20 :   if(noise_!=2) {
     479             :     // initial value of the uncertainty
     480          14 :     parse("SIGMA0", sigma_ini);
     481          14 :     if(sigma_ini<=0) error("you must specify a positive SIGMA0");
     482             :     // MC parameters
     483          14 :     parse("DSIGMA", dsigma);
     484          14 :     if(dsigma<0) error("you must specify a positive DSIGMA");
     485          14 :     parse("MC_STRIDE", MCstride_);
     486          14 :     if(dsigma>0 && MCstride_<=0) error("you must specify a positive MC_STRIDE");
     487             :     // status file parameters
     488          14 :     parse("WRITE_STRIDE", statusstride_);
     489          14 :     if(statusstride_==0) error("you must specify a positive WRITE_STRIDE");
     490          28 :     parse("STATUS_FILE",  statusfilename_);
     491          28 :     if(statusfilename_=="") statusfilename_ = "MISTATUS"+getLabel();
     492           0 :     else                    statusfilename_ = statusfilename_+getLabel();
     493             :   }
     494             : 
     495             :   // error file
     496             :   std::string errfile;
     497          40 :   parse("ERR_FILE", errfile);
     498             : 
     499             :   // file with experimental overlaps
     500             :   std::string ovfile;
     501          20 :   parse("OV_FILE", ovfile);
     502             : 
     503             :   // integral of the experimetal density
     504          20 :   double norm_d = 0.0;
     505          20 :   parse("NORM_DENSITY", norm_d);
     506             : 
     507             :   // temperature
     508          20 :   kbt_ = getkBT();
     509             : 
     510             :   // exponent of uncertainty prior
     511          20 :   parse("PRIOR",prior_);
     512             : 
     513             :   // simulated annealing stuff
     514          20 :   parse("ANNEAL", nanneal_);
     515          20 :   parse("ANNEAL_FACT", kanneal_);
     516          20 :   if(nanneal_>0 && kanneal_<=1.0) error("with ANNEAL, ANNEAL_FACT must be greater than 1");
     517             : 
     518             :   // regression stride
     519          20 :   parse("REGRESSION",nregres_);
     520             :   // other regression parameters
     521          20 :   if(nregres_>0) {
     522           0 :     parse("REG_SCALE_MIN",scale_min_);
     523           0 :     parse("REG_SCALE_MAX",scale_max_);
     524           0 :     parse("REG_DSCALE",dscale_);
     525             :     // checks
     526           0 :     if(scale_max_<=scale_min_) error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
     527           0 :     if(dscale_<=0.) error("with REGRESSION, REG_DSCALE must be positive");
     528             :   }
     529             : 
     530             :   // scale factor
     531          20 :   parse("SCALE", scale_);
     532             : 
     533             :   // read map resolution
     534             :   double reso;
     535          20 :   parse("RESOLUTION", reso);
     536          20 :   if(reso<=0.) error("RESOLUTION should be strictly positive");
     537             : 
     538             :   // neighbor list stuff
     539          20 :   parse("NL_CUTOFF",nl_cutoff_);
     540          20 :   if(nl_cutoff_<=0.0) error("NL_CUTOFF should be explicitly specified and positive");
     541          20 :   parse("NL_STRIDE",nl_stride_);
     542          20 :   if(nl_stride_==0) error("NL_STRIDE should be explicitly specified and positive");
     543             : 
     544             :   // averaging or not
     545          20 :   parseFlag("NO_AVER",no_aver_);
     546             : 
     547             :   // write overlap file
     548          20 :   parse("WRITE_OV_STRIDE", ovstride_);
     549          20 :   parse("WRITE_OV", ovfilename_);
     550          20 :   if(ovstride_>0 && ovfilename_=="") error("With WRITE_OV_STRIDE you must specify WRITE_OV");
     551             : 
     552             :   // set parallel stuff
     553          20 :   size_=comm.Get_size();
     554          20 :   rank_=comm.Get_rank();
     555             : 
     556             :   // get number of replicas
     557          20 :   if(rank_==0) {
     558          14 :     if(no_aver_) {
     559          12 :       nrep_ = 1;
     560             :     } else {
     561           2 :       nrep_ = multi_sim_comm.Get_size();
     562             :     }
     563          14 :     replica_ = multi_sim_comm.Get_rank();
     564             :   } else {
     565           6 :     nrep_ = 0;
     566           6 :     replica_ = 0;
     567             :   }
     568          20 :   comm.Sum(&nrep_,1);
     569          20 :   comm.Sum(&replica_,1);
     570             : 
     571             :   // Reweighting flag
     572          20 :   parseFlag("REWEIGHT", do_reweight_);
     573          20 :   if(do_reweight_&&getNumberOfArguments()!=1) error("To REWEIGHT one must provide one single bias as an argument");
     574          20 :   if(do_reweight_&&no_aver_) error("REWEIGHT cannot be used with NO_AVER");
     575          20 :   if(do_reweight_&&nrep_<2) error("REWEIGHT can only be used in parallel with 2 or more replicas");
     576          20 :   if(!getRestart()) average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
     577           0 :   else average_weights_.resize(nrep_, 0.);
     578             : 
     579          20 :   unsigned averaging=0;
     580          20 :   parse("AVERAGING", averaging);
     581          20 :   if(averaging>0) {
     582           2 :     decay_w_ = 1./static_cast<double> (averaging);
     583             :   }
     584             : 
     585          20 :   checkRead();
     586             : 
     587          20 :   log.printf("  atoms involved : ");
     588       10876 :   for(unsigned i=0; i<atoms.size(); ++i) log.printf("%d ",atoms[i].serial());
     589          20 :   log.printf("\n");
     590          20 :   log.printf("  GMM data file : %s\n", GMM_file.c_str());
     591          20 :   if(no_aver_) log.printf("  without ensemble averaging\n");
     592          20 :   log.printf("  type of data noise : %s\n", noise.c_str());
     593          20 :   log.printf("  neighbor list cutoff : %lf\n", nl_cutoff_);
     594          20 :   log.printf("  neighbor list stride : %u\n",  nl_stride_);
     595          20 :   log.printf("  minimum uncertainty : %f\n",sigma_min);
     596          20 :   log.printf("  scale factor : %lf\n",scale_);
     597          20 :   if(nregres_>0) {
     598           0 :     log.printf("  regression stride : %u\n", nregres_);
     599           0 :     log.printf("  regression minimum scale : %lf\n", scale_min_);
     600           0 :     log.printf("  regression maximum scale : %lf\n", scale_max_);
     601           0 :     log.printf("  regression maximum scale MC move : %lf\n", dscale_);
     602             :   }
     603          20 :   if(noise_!=2) {
     604          14 :     log.printf("  initial value of the uncertainty : %f\n",sigma_ini);
     605          14 :     log.printf("  max MC move in uncertainty : %f\n",dsigma);
     606          14 :     log.printf("  MC stride : %u\n", MCstride_);
     607          14 :     log.printf("  reading/writing to status file : %s\n",statusfilename_.c_str());
     608          14 :     log.printf("  with stride : %u\n",statusstride_);
     609             :   }
     610          20 :   if(errfile.size()>0) log.printf("  reading experimental errors from file : %s\n", errfile.c_str());
     611          20 :   if(ovfile.size()>0)  log.printf("  reading experimental overlaps from file : %s\n", ovfile.c_str());
     612          20 :   log.printf("  temperature of the system in energy unit : %f\n",kbt_);
     613          20 :   log.printf("  prior exponent : %f\n",prior_);
     614          20 :   log.printf("  number of replicas for averaging: %u\n",nrep_);
     615          20 :   log.printf("  id of the replica : %u\n",replica_);
     616          20 :   if(nanneal_>0) {
     617           0 :     log.printf("  length of annealing cycle : %u\n",nanneal_);
     618           0 :     log.printf("  annealing factor : %f\n",kanneal_);
     619             :   }
     620          20 :   if(ovstride_>0) {
     621           0 :     log.printf("  stride for writing model overlaps : %u\n",ovstride_);
     622           0 :     log.printf("  file for writing model overlaps : %s\n", ovfilename_.c_str());
     623             :   }
     624             : 
     625             :   // set constant quantity before calculating stuff
     626          20 :   cfact_ = 1.0/pow( 2.0*pi, 1.5 );
     627             : 
     628             :   // calculate model GMM constant parameters
     629          20 :   std::vector<double> GMM_m_w = get_GMM_m(atoms);
     630             : 
     631             :   // read data GMM parameters
     632          20 :   get_GMM_d(GMM_file);
     633          20 :   log.printf("  number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
     634             : 
     635             :   // normalize atom weight map
     636          40 :   if(norm_d <= 0.0) norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
     637             :   double norm_m = accumulate(GMM_m_w.begin(),  GMM_m_w.end(),  0.0);
     638             :   // renormalization
     639         100 :   for(unsigned i=0; i<GMM_m_w_.size(); ++i) GMM_m_w_[i] *= norm_d / norm_m;
     640             : 
     641             :   // read experimental errors
     642             :   std::vector<double> exp_err;
     643          20 :   if(errfile.size()>0) exp_err = read_exp_errors(errfile);
     644             : 
     645             :   // get self overlaps between data GMM components
     646          20 :   if(ovfile.size()>0) {
     647           0 :     ovdd_ = read_exp_overlaps(ovfile);
     648             :   } else {
     649        1982 :     for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
     650        1962 :       double ov = get_self_overlap(i);
     651        1962 :       ovdd_.push_back(ov);
     652             :     }
     653             :   }
     654             : 
     655          20 :   log.printf("  number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
     656             :   // cycle on GMM groups
     657          40 :   for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
     658          20 :     log.printf("    group %d\n", Gid);
     659             :     // calculate median overlap and experimental error
     660             :     std::vector<double> ovdd;
     661             :     std::vector<double> err;
     662             :     // cycle on the group members
     663        1982 :     for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
     664             :       // GMM id
     665        1962 :       int GMMid = GMM_d_grps_[Gid][i];
     666             :       // add to experimental error
     667        1962 :       if(errfile.size()>0) err.push_back(exp_err[GMMid]);
     668        1962 :       else                 err.push_back(0.);
     669             :       // add to GMM overlap
     670        1962 :       ovdd.push_back(ovdd_[GMMid]);
     671             :     }
     672             :     // calculate median quantities
     673          20 :     double ovdd_m = get_median(ovdd);
     674          20 :     double err_m  = get_median(err);
     675             :     // print out statistics
     676          20 :     log.printf("     # of members : %zu\n", GMM_d_grps_[Gid].size());
     677          20 :     log.printf("     median overlap : %lf\n", ovdd_m);
     678          20 :     log.printf("     median error : %lf\n", err_m);
     679             :     // add minimum value of sigma for this group of GMMs
     680          20 :     sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
     681             :     // these are only needed with Gaussian and Outliers noise models
     682          20 :     if(noise_!=2) {
     683             :       // set dsigma
     684          14 :       dsigma_.push_back(dsigma * ovdd_m);
     685             :       // set sigma max
     686          14 :       sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
     687             :       // initialize sigma
     688          28 :       sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
     689             :     }
     690             :   }
     691             : 
     692             :   // read status file if restarting
     693          20 :   if(getRestart() && noise_!=2) read_status();
     694             : 
     695             :   // calculate auxiliary stuff
     696          20 :   calculate_useful_stuff(reso);
     697             : 
     698             :   // prepare data and derivative std::vectors
     699          20 :   ovmd_.resize(ovdd_.size());
     700          20 :   ovmd_ave_.resize(ovdd_.size());
     701          20 :   atom_der_.resize(GMM_m_type_.size());
     702          20 :   GMMid_der_.resize(ovdd_.size());
     703             : 
     704             :   // clear things that are no longer needed
     705             :   GMM_d_cov_.clear();
     706             : 
     707             :   // add components
     708          40 :   addComponentWithDerivatives("scoreb"); componentIsNotPeriodic("scoreb");
     709             : 
     710          48 :   if(noise_!=2) {addComponent("acc"); componentIsNotPeriodic("acc");}
     711             : 
     712          20 :   if(nregres_>0) {
     713           0 :     addComponent("scale");     componentIsNotPeriodic("scale");
     714           0 :     addComponent("accscale");  componentIsNotPeriodic("accscale");
     715           0 :     addComponent("enescale");  componentIsNotPeriodic("enescale");
     716             :   }
     717             : 
     718          20 :   if(nanneal_>0) {addComponent("anneal"); componentIsNotPeriodic("anneal");}
     719             : 
     720          20 :   if(do_reweight_) {
     721           4 :     addComponent("biasDer");
     722           4 :     componentIsNotPeriodic("biasDer");
     723           4 :     addComponent("weight");
     724           4 :     componentIsNotPeriodic("weight");
     725             :   }
     726             : 
     727          40 :   addComponent("neff");
     728          20 :   componentIsNotPeriodic("neff");
     729             : 
     730          34 :   for(unsigned i=0; i<sigma_.size(); ++i) {
     731          14 :     std::string num; Tools::convert(i, num);
     732          28 :     addComponent("sigma-"+num); componentIsNotPeriodic("sigma-"+num);
     733          28 :     getPntrToComponent("sigma-"+num)->set(sigma_[i]);
     734             :   }
     735             : 
     736             :   // initialize random seed
     737             :   unsigned iseed;
     738          20 :   if(rank_==0) iseed = time(NULL)+replica_;
     739           6 :   else iseed = 0;
     740          20 :   comm.Sum(&iseed, 1);
     741          20 :   random_.setSeed(-iseed);
     742             : 
     743             :   // request the atoms
     744          20 :   requestAtoms(atoms);
     745          20 :   setDerivatives();
     746             : 
     747             :   // print bibliography
     748          40 :   log<<"  Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
     749          40 :   log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
     750          40 :   log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
     751          22 :   if(do_reweight_) log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
     752          22 :   if(!no_aver_ && nrep_>1)log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
     753          20 :   log<<"\n";
     754          20 : }
     755             : 
     756           0 : void EMMI::write_model_overlap(long long int step)
     757             : {
     758           0 :   OFile ovfile;
     759           0 :   ovfile.link(*this);
     760           0 :   std::string num; Tools::convert(step,num);
     761           0 :   std::string name = ovfilename_+"-"+num;
     762           0 :   ovfile.open(name);
     763             :   ovfile.setHeavyFlush();
     764           0 :   ovfile.fmtField("%10.7e ");
     765             : // write overlaps
     766           0 :   for(int i=0; i<ovmd_.size(); ++i) {
     767           0 :     ovfile.printField("Model", ovmd_[i]);
     768           0 :     ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
     769           0 :     ovfile.printField("Data", ovdd_[i]);
     770           0 :     ovfile.printField();
     771             :   }
     772           0 :   ovfile.close();
     773           0 : }
     774             : 
     775          40 : double EMMI::get_median(std::vector<double> &v)
     776             : {
     777             : // dimension of std::vector
     778          40 :   unsigned size = v.size();
     779             : // in case of only one entry
     780          40 :   if (size==1) {
     781           0 :     return v[0];
     782             :   } else {
     783             :     // reorder std::vector
     784          40 :     sort(v.begin(), v.end());
     785             :     // odd or even?
     786          40 :     if (size%2==0) {
     787           4 :       return (v[size/2-1]+v[size/2])/2.0;
     788             :     } else {
     789          36 :       return v[size/2];
     790             :     }
     791             :   }
     792             : }
     793             : 
     794           0 : void EMMI::read_status()
     795             : {
     796             :   double MDtime;
     797             : // open file
     798             :   auto ifile = Tools::make_unique<IFile>();
     799           0 :   ifile->link(*this);
     800           0 :   if(ifile->FileExist(statusfilename_)) {
     801           0 :     ifile->open(statusfilename_);
     802           0 :     while(ifile->scanField("MD_time", MDtime)) {
     803           0 :       for(unsigned i=0; i<sigma_.size(); ++i) {
     804             :         // convert i to std::string
     805           0 :         std::string num; Tools::convert(i,num);
     806             :         // read entries
     807           0 :         ifile->scanField("s"+num, sigma_[i]);
     808             :       }
     809             :       // new line
     810           0 :       ifile->scanField();
     811             :     }
     812           0 :     ifile->close();
     813             :   } else {
     814           0 :     error("Cannot find status file "+statusfilename_+"\n");
     815             :   }
     816           0 : }
     817             : 
     818       10904 : void EMMI::print_status(long long int step)
     819             : {
     820             : // if first time open the file
     821       10904 :   if(first_status_) {
     822          14 :     first_status_ = false;
     823          14 :     statusfile_.link(*this);
     824          14 :     statusfile_.open(statusfilename_);
     825             :     statusfile_.setHeavyFlush();
     826          28 :     statusfile_.fmtField("%6.3e ");
     827             :   }
     828             : // write fields
     829       10904 :   double MDtime = static_cast<double>(step)*getTimeStep();
     830       10904 :   statusfile_.printField("MD_time", MDtime);
     831       21808 :   for(unsigned i=0; i<sigma_.size(); ++i) {
     832             :     // convert i to std::string
     833       10904 :     std::string num; Tools::convert(i,num);
     834             :     // print entry
     835       21808 :     statusfile_.printField("s"+num, sigma_[i]);
     836             :   }
     837       10904 :   statusfile_.printField();
     838       10904 : }
     839             : 
     840           0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
     841             :   bool accept = false;
     842             :   // calculate delta energy
     843           0 :   double delta = ( newE - oldE ) / kbt;
     844             :   // if delta is negative always accept move
     845           0 :   if( delta < 0.0 ) {
     846             :     accept = true;
     847             :   } else {
     848             :     // otherwise extract random number
     849           0 :     double s = random_.RandU01();
     850           0 :     if( s < std::exp(-delta) ) { accept = true; }
     851             :   }
     852           0 :   return accept;
     853             : }
     854             : 
     855           0 : void EMMI::doMonteCarlo()
     856             : {
     857             :   // extract random GMM group
     858           0 :   unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
     859           0 :   if(nGMM==GMM_d_grps_.size()) nGMM -= 1;
     860             : 
     861             :   // generate random move
     862           0 :   double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
     863             :   // new sigma
     864           0 :   double new_s = sigma_[nGMM] + shift;
     865             :   // check boundaries
     866           0 :   if(new_s > sigma_max_[nGMM]) {new_s = 2.0 * sigma_max_[nGMM] - new_s;}
     867           0 :   if(new_s < sigma_min_[nGMM]) {new_s = 2.0 * sigma_min_[nGMM] - new_s;}
     868             :   // old s2
     869           0 :   double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
     870             :   // new s2
     871           0 :   double new_inv_s2 = 1.0 / new_s / new_s;
     872             : 
     873             :   // cycle on GMM group and calculate old and new energy
     874             :   double old_ene = 0.0;
     875             :   double new_ene = 0.0;
     876           0 :   double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
     877             : 
     878             :   // in case of Gaussian noise
     879           0 :   if(noise_==0) {
     880             :     double chi2 = 0.0;
     881           0 :     for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
     882             :       // id GMM component
     883           0 :       int GMMid = GMM_d_grps_[nGMM][i];
     884             :       // deviation
     885           0 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
     886             :       // add to chi2
     887           0 :       chi2 += dev * dev;
     888             :     }
     889             :     // final energy calculation: add normalization and prior
     890           0 :     old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
     891           0 :     new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
     892             :   }
     893             : 
     894             :   // in case of Outliers noise
     895           0 :   if(noise_==1) {
     896           0 :     for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
     897             :       // id GMM component
     898           0 :       int GMMid = GMM_d_grps_[nGMM][i];
     899             :       // calculate deviation
     900           0 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
     901             :       // add to energies
     902           0 :       old_ene += std::log1p( 0.5 * dev * dev * old_inv_s2);
     903           0 :       new_ene += std::log1p( 0.5 * dev * dev * new_inv_s2);
     904             :     }
     905             :     // final energy calculation: add normalization and prior
     906           0 :     old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
     907           0 :     new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
     908             :   }
     909             : 
     910             :   // increment number of trials
     911           0 :   MCtrials_ += 1.0;
     912             : 
     913             :   // accept or reject
     914           0 :   bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
     915           0 :   if(accept) {
     916           0 :     sigma_[nGMM] = new_s;
     917           0 :     MCaccept_ += 1.0;
     918             :   }
     919             :   // local communication
     920           0 :   if(rank_!=0) {
     921           0 :     for(unsigned i=0; i<sigma_.size(); ++i) sigma_[i] = 0.0;
     922           0 :     MCaccept_ = 0.0;
     923             :   }
     924           0 :   if(size_>1) {
     925           0 :     comm.Sum(&sigma_[0], sigma_.size());
     926           0 :     comm.Sum(&MCaccept_, 1);
     927             :   }
     928             : 
     929             :   // update sigma output
     930           0 :   std::string num; Tools::convert(nGMM, num);
     931           0 :   getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
     932           0 : }
     933             : 
     934           0 : std::vector<double> EMMI::read_exp_errors(const std::string & errfile)
     935             : {
     936             :   int nexp, idcomp;
     937             :   double err;
     938             :   std::vector<double> exp_err;
     939             : // open file
     940           0 :   IFile *ifile = new IFile();
     941           0 :   if(ifile->FileExist(errfile)) {
     942           0 :     ifile->open(errfile);
     943             :     // scan for number of experimental errors
     944           0 :     ifile->scanField("Nexp", nexp);
     945             :     // cycle on GMM components
     946           0 :     while(ifile->scanField("Id",idcomp)) {
     947             :       // total experimental error
     948           0 :       double err_tot = 0.0;
     949             :       // cycle on number of experimental overlaps
     950           0 :       for(unsigned i=0; i<nexp; ++i) {
     951           0 :         std::string ss; Tools::convert(i,ss);
     952           0 :         ifile->scanField("Err"+ss, err);
     953             :         // add to total error
     954           0 :         err_tot += err*err;
     955             :       }
     956             :       // new line
     957           0 :       ifile->scanField();
     958             :       // calculate RMSE
     959           0 :       err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
     960             :       // add to global
     961           0 :       exp_err.push_back(err_tot);
     962             :     }
     963           0 :     ifile->close();
     964             :   } else {
     965           0 :     error("Cannot find ERR_FILE "+errfile+"\n");
     966             :   }
     967           0 :   return exp_err;
     968             : }
     969             : 
     970           0 : std::vector<double> EMMI::read_exp_overlaps(const std::string & ovfile)
     971             : {
     972             :   int idcomp;
     973             :   double ov;
     974             :   std::vector<double> ovdd;
     975             : // open file
     976           0 :   IFile *ifile = new IFile();
     977           0 :   if(ifile->FileExist(ovfile)) {
     978           0 :     ifile->open(ovfile);
     979             :     // cycle on GMM components
     980           0 :     while(ifile->scanField("Id",idcomp)) {
     981             :       // read experimental overlap
     982           0 :       ifile->scanField("Overlap", ov);
     983             :       // add to ovdd
     984           0 :       ovdd.push_back(ov);
     985             :       // new line
     986           0 :       ifile->scanField();
     987             :     }
     988           0 :     ifile->close();
     989             :   } else {
     990           0 :     error("Cannot find OV_FILE "+ovfile+"\n");
     991             :   }
     992           0 :   return ovdd;
     993             : }
     994             : 
     995          20 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms)
     996             : {
     997             :   // list of weights - one per atom
     998             :   std::vector<double> GMM_m_w;
     999             : 
    1000          20 :   auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
    1001             :   // map of atom types to A and B coefficients of scattering factor
    1002             :   // f(s) = A * exp(-B*s**2)
    1003             :   // B is in Angstrom squared
    1004             :   // map between an atom type and an index
    1005             :   std::map<std::string, unsigned> type_map;
    1006          20 :   type_map["C"]=0;
    1007          20 :   type_map["O"]=1;
    1008          20 :   type_map["N"]=2;
    1009          20 :   type_map["S"]=3;
    1010             :   // fill in sigma std::vector
    1011          20 :   GMM_m_s_.push_back(0.01*15.146);  // type 0
    1012          20 :   GMM_m_s_.push_back(0.01*8.59722); // type 1
    1013          20 :   GMM_m_s_.push_back(0.01*11.1116); // type 2
    1014          20 :   GMM_m_s_.push_back(0.01*15.8952); // type 3
    1015             :   // fill in weight std::vector
    1016          20 :   GMM_m_w_.push_back(2.49982); // type 0
    1017          20 :   GMM_m_w_.push_back(1.97692); // type 1
    1018          20 :   GMM_m_w_.push_back(2.20402); // type 2
    1019          20 :   GMM_m_w_.push_back(5.14099); // type 3
    1020             : 
    1021             :   // check if MOLINFO line is present
    1022          20 :   if( moldat ) {
    1023          20 :     log<<"  MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
    1024       10876 :     for(unsigned i=0; i<atoms.size(); ++i) {
    1025             :       // get atom name
    1026       10856 :       std::string name = moldat->getAtomName(atoms[i]);
    1027             :       char type;
    1028             :       // get atom type
    1029       10856 :       char first = name.at(0);
    1030             :       // GOLDEN RULE: type is first letter, if not a number
    1031       10856 :       if (!isdigit(first)) {
    1032             :         type = first;
    1033             :         // otherwise is the second
    1034             :       } else {
    1035           0 :         type = name.at(1);
    1036             :       }
    1037             :       // check if key in map
    1038       10856 :       std::string type_s = std::string(1,type);
    1039       10856 :       if(type_map.find(type_s) != type_map.end()) {
    1040             :         // save atom type
    1041       10856 :         GMM_m_type_.push_back(type_map[type_s]);
    1042             :         // this will be normalized in the final density
    1043       10856 :         GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
    1044             :       } else {
    1045           0 :         error("Wrong atom type "+type_s+" from atom name "+name+"\n");
    1046             :       }
    1047             :     }
    1048             :   } else {
    1049           0 :     error("MOLINFO DATA not found\n");
    1050             :   }
    1051          20 :   return GMM_m_w;
    1052             : }
    1053             : 
    1054        1962 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w)
    1055             : {
    1056             : 
    1057             : // check if positive defined, by calculating the 3 leading principal minors
    1058        1962 :   double pm1 = cov[0];
    1059        1962 :   double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
    1060        1962 :   double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
    1061             : // apply Sylvester’s criterion
    1062        1962 :   if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0)
    1063           0 :     error("check data GMM: covariance matrix is not positive defined");
    1064             : 
    1065             : // check if weight is positive
    1066        1962 :   if(w<=0) error("check data GMM: weight must be positive");
    1067        1962 : }
    1068             : 
    1069             : // read GMM data file in PLUMED format:
    1070          20 : void EMMI::get_GMM_d(const std::string & GMM_file)
    1071             : {
    1072          20 :   VectorGeneric<6> cov;
    1073             : 
    1074             : // open file
    1075             :   auto ifile=Tools::make_unique<IFile>();
    1076          20 :   if(ifile->FileExist(GMM_file)) {
    1077          20 :     ifile->open(GMM_file);
    1078             :     int idcomp;
    1079        3964 :     while(ifile->scanField("Id",idcomp)) {
    1080             :       int beta;
    1081             :       double w, m0, m1, m2;
    1082        3924 :       ifile->scanField("Weight",w);
    1083        3924 :       ifile->scanField("Mean_0",m0);
    1084        3924 :       ifile->scanField("Mean_1",m1);
    1085        3924 :       ifile->scanField("Mean_2",m2);
    1086        3924 :       ifile->scanField("Cov_00",cov[0]);
    1087        3924 :       ifile->scanField("Cov_01",cov[1]);
    1088        3924 :       ifile->scanField("Cov_02",cov[2]);
    1089        3924 :       ifile->scanField("Cov_11",cov[3]);
    1090        3924 :       ifile->scanField("Cov_12",cov[4]);
    1091        3924 :       ifile->scanField("Cov_22",cov[5]);
    1092        1962 :       ifile->scanField("Beta",beta);
    1093             :       // check input
    1094        1962 :       check_GMM_d(cov, w);
    1095             :       // check beta
    1096        1962 :       if(beta<0) error("Beta must be positive!");
    1097             :       // center of the Gaussian
    1098        1962 :       GMM_d_m_.push_back(Vector(m0,m1,m2));
    1099             :       // covariance matrix
    1100        1962 :       GMM_d_cov_.push_back(cov);
    1101             :       // weight
    1102        1962 :       GMM_d_w_.push_back(w);
    1103             :       // beta
    1104        1962 :       GMM_d_beta_.push_back(beta);
    1105             :       // new line
    1106        1962 :       ifile->scanField();
    1107             :     }
    1108             :   } else {
    1109           0 :     error("Cannot find GMM_FILE "+GMM_file+"\n");
    1110             :   }
    1111             :   // now create a set from beta (unique set of values)
    1112          20 :   std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
    1113             :   // now prepare the group std::vector
    1114          20 :   GMM_d_grps_.resize(bu.size());
    1115             :   // and fill it in
    1116        1982 :   for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
    1117        1962 :     if(GMM_d_beta_[i]>=GMM_d_grps_.size()) error("Check Beta values");
    1118        1962 :     GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
    1119             :   }
    1120          20 : }
    1121             : 
    1122          20 : void EMMI::calculate_useful_stuff(double reso)
    1123             : {
    1124             :   // We use the following definition for resolution:
    1125             :   // the Fourier transform of the density distribution in real space
    1126             :   // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
    1127             :   // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
    1128             :   // average value of B
    1129             :   double Bave = 0.0;
    1130       10876 :   for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
    1131       10856 :     Bave += GMM_m_s_[GMM_m_type_[i]];
    1132             :   }
    1133          20 :   Bave /= static_cast<double>(GMM_m_type_.size());
    1134             :   // calculate blur factor
    1135             :   double blur = 0.0;
    1136          20 :   if(reso*reso>Bave) blur = reso*reso-Bave;
    1137          36 :   else warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
    1138             :   // add blur to B
    1139         100 :   for(unsigned i=0; i<GMM_m_s_.size(); ++i) GMM_m_s_[i] += blur;
    1140             :   // calculate average resolution
    1141             :   double ave_res = 0.0;
    1142       10876 :   for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
    1143       10856 :     ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
    1144             :   }
    1145          20 :   ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
    1146          20 :   log.printf("  experimental map resolution : %3.2f\n", reso);
    1147          20 :   log.printf("  predicted map resolution : %3.2f\n", ave_res);
    1148          20 :   log.printf("  blur factor : %f\n", blur);
    1149             :   // now calculate useful stuff
    1150          20 :   VectorGeneric<6> cov, sum, inv_sum;
    1151             :   // cycle on all atoms types (4 for the moment)
    1152         100 :   for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
    1153             :     // the Gaussian in density (real) space is the FT of scattering factor
    1154             :     // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
    1155          80 :     double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
    1156             :     // covariance matrix for spherical Gaussian
    1157          80 :     cov[0]=s*s; cov[1]=0.0; cov[2]=0.0;
    1158          80 :     cov[3]=s*s; cov[4]=0.0;
    1159          80 :     cov[5]=s*s;
    1160             :     // cycle on all data GMM
    1161        7928 :     for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
    1162             :       // we need the sum of the covariance matrices
    1163       54936 :       for(unsigned k=0; k<6; ++k) sum[k] = cov[k] + GMM_d_cov_[j][k];
    1164             :       // and to calculate its determinant
    1165        7848 :       double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
    1166        7848 :       det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
    1167        7848 :       det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
    1168             :       // calculate prefactor - model weights are already normalized
    1169        7848 :       double pre_fact =  cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
    1170             :       // and its inverse
    1171        7848 :       inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
    1172        7848 :       inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
    1173        7848 :       inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
    1174        7848 :       inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
    1175        7848 :       inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
    1176        7848 :       inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
    1177             :       // now we store the prefactor
    1178        7848 :       pre_fact_.push_back(pre_fact);
    1179             :       // and the inverse of the sum
    1180        7848 :       inv_cov_md_.push_back(inv_sum);
    1181             :     }
    1182             :   }
    1183             :   // tabulate exponential
    1184          20 :   dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
    1185    20000020 :   for(unsigned i=0; i<nexp_; ++i) {
    1186    20000000 :     tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
    1187             :   }
    1188          20 : }
    1189             : 
    1190             : // get prefactors
    1191     1621458 : double EMMI::get_prefactor_inverse
    1192             : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
    1193             :  double GMM_w_0, double GMM_w_1,
    1194             :  VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum)
    1195             : {
    1196             : // we need the sum of the covariance matrices
    1197    11350206 :   for(unsigned k=0; k<6; ++k) sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
    1198             : 
    1199             : // and to calculate its determinant
    1200     1621458 :   double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
    1201     1621458 :   det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
    1202     1621458 :   det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
    1203             : 
    1204             : // the prefactor is
    1205     1621458 :   double pre_fact =  cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
    1206             : 
    1207             : // and its inverse
    1208     1621458 :   inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
    1209     1621458 :   inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
    1210     1621458 :   inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
    1211     1621458 :   inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
    1212     1621458 :   inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
    1213     1621458 :   inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
    1214             : 
    1215             : // return pre-factor
    1216     1621458 :   return pre_fact;
    1217             : }
    1218             : 
    1219        1962 : double EMMI::get_self_overlap(unsigned id)
    1220             : {
    1221             :   double ov_tot = 0.0;
    1222        1962 :   VectorGeneric<6> sum, inv_sum;
    1223        1962 :   Vector ov_der;
    1224             : // start loop
    1225     1623420 :   for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
    1226             :     // call auxiliary method
    1227     1621458 :     double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
    1228     1621458 :                                             GMM_d_w_[id],   GMM_d_w_[i], sum, inv_sum);
    1229             :     // add overlap to ov_tot
    1230     1621458 :     ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
    1231             :   }
    1232             : // and return it
    1233        1962 :   return ov_tot;
    1234             : }
    1235             : 
    1236             : // get overlap and derivatives
    1237     3732816 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
    1238             :                          const VectorGeneric<6> &inv_cov_md, Vector &ov_der)
    1239             : {
    1240     3732816 :   Vector md;
    1241             :   // calculate std::vector difference m_m-d_m with/without pbc
    1242     3732816 :   if(pbc_) md = pbcDistance(d_m, m_m);
    1243     3732816 :   else     md = delta(d_m, m_m);
    1244             :   // calculate product of transpose of md and inv_cov_md
    1245     3732816 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
    1246     3732816 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
    1247     3732816 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
    1248             :   // calculate product of prod and md
    1249     3732816 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
    1250             :   // final calculation
    1251     3732816 :   ov = pre_fact * std::exp(-0.5*ov);
    1252             :   // derivatives
    1253     3732816 :   ov_der = ov * Vector(p_x, p_y, p_z);
    1254     3732816 :   return ov;
    1255             : }
    1256             : 
    1257             : // get the exponent of the overlap
    1258    59085036 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
    1259             :                              const VectorGeneric<6> &inv_cov_md)
    1260             : {
    1261    59085036 :   Vector md;
    1262             :   // calculate std::vector difference m_m-d_m with/without pbc
    1263    59085036 :   if(pbc_) md = pbcDistance(d_m, m_m);
    1264    59085036 :   else     md = delta(d_m, m_m);
    1265             :   // calculate product of transpose of md and inv_cov_md
    1266    59085036 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
    1267    59085036 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
    1268    59085036 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
    1269             :   // calculate product of prod and md
    1270    59085036 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
    1271    59085036 :   return ov;
    1272             : }
    1273             : 
    1274       16355 : void EMMI::update_neighbor_list()
    1275             : {
    1276             :   // dimension of GMM and atom std::vectors
    1277       16355 :   unsigned GMM_d_size = GMM_d_m_.size();
    1278       16355 :   unsigned GMM_m_size = GMM_m_type_.size();
    1279             :   // local neighbor list
    1280             :   std::vector < unsigned > nl_l;
    1281             :   // clear old neighbor list
    1282       16355 :   nl_.clear();
    1283             : 
    1284             :   // cycle on GMM components - in parallel
    1285      116273 :   for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
    1286             :     // overlap lists and map
    1287             :     std::vector<double> ov_l;
    1288             :     std::map<double, unsigned> ov_m;
    1289             :     // total overlap with id
    1290             :     double ov_tot = 0.0;
    1291             :     // cycle on all atoms
    1292    59184954 :     for(unsigned im=0; im<GMM_m_size; ++im) {
    1293             :       // get index in auxiliary lists
    1294    59085036 :       unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
    1295             :       // calculate exponent of overlap
    1296    59085036 :       double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
    1297             :       // get index of 0.5*expov in tabulated exponential
    1298    59085036 :       unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
    1299             :       // check boundaries and skip atom in case
    1300    59085036 :       if(itab >= tab_exp_.size()) continue;
    1301             :       // in case calculate overlap
    1302     4133388 :       double ov = pre_fact_[kaux] * tab_exp_[itab];
    1303             :       // add to list
    1304     4133388 :       ov_l.push_back(ov);
    1305             :       // and map to retrieve atom index
    1306     4133388 :       ov_m[ov] = im;
    1307             :       // increase ov_tot
    1308     4133388 :       ov_tot += ov;
    1309             :     }
    1310             :     // check if zero size -> ov_tot = 0
    1311       99918 :     if(ov_l.size()==0) continue;
    1312             :     // define cutoff
    1313       99842 :     double ov_cut = ov_tot * nl_cutoff_;
    1314             :     // sort ov_l in ascending order
    1315       99842 :     std::sort(ov_l.begin(), ov_l.end());
    1316             :     // integrate ov_l
    1317             :     double res = 0.0;
    1318     2146102 :     for(unsigned i=0; i<ov_l.size(); ++i) {
    1319     2146102 :       res += ov_l[i];
    1320             :       // if exceeding the cutoff for overlap, stop
    1321     2146102 :       if(res >= ov_cut) break;
    1322             :       else ov_m.erase(ov_l[i]);
    1323             :     }
    1324             :     // now add atoms to neighborlist
    1325     2186970 :     for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it)
    1326     2087128 :       nl_l.push_back(id*GMM_m_size+it->second);
    1327             :     // end cycle on GMM components in parallel
    1328             :   }
    1329             :   // find total dimension of neighborlist
    1330       16355 :   std::vector <int> recvcounts(size_, 0);
    1331       16355 :   recvcounts[rank_] = nl_l.size();
    1332       16355 :   comm.Sum(&recvcounts[0], size_);
    1333             :   int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
    1334             :   // resize neighbor stuff
    1335       16355 :   nl_.resize(tot_size);
    1336             :   // calculate std::vector of displacement
    1337       16355 :   std::vector<int> disp(size_);
    1338       16355 :   disp[0] = 0;
    1339             :   int rank_size = 0;
    1340       27257 :   for(unsigned i=0; i<size_-1; ++i) {
    1341       10902 :     rank_size += recvcounts[i];
    1342       10902 :     disp[i+1] = rank_size;
    1343             :   }
    1344             :   // Allgather neighbor list
    1345       16355 :   comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
    1346             :   // now resize derivatives
    1347       16355 :   ovmd_der_.resize(tot_size);
    1348       16355 : }
    1349             : 
    1350          30 : void EMMI::prepare()
    1351             : {
    1352          30 :   if(getExchangeStep()) first_time_=true;
    1353          30 : }
    1354             : 
    1355             : // overlap calculator
    1356       16365 : void EMMI::calculate_overlap() {
    1357             : 
    1358       16365 :   if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
    1359       16355 :     update_neighbor_list();
    1360       16355 :     first_time_=false;
    1361             :   }
    1362             : 
    1363             :   // clean temporary std::vectors
    1364      174342 :   for(unsigned i=0; i<ovmd_.size(); ++i)     ovmd_[i] = 0.0;
    1365     3168864 :   for(unsigned i=0; i<ovmd_der_.size(); ++i) ovmd_der_[i] = Vector(0,0,0);
    1366             : 
    1367             :   // we have to cycle over all model and data GMM components in the neighbor list
    1368       16365 :   unsigned GMM_d_size = GMM_d_m_.size();
    1369       16365 :   unsigned GMM_m_size = GMM_m_type_.size();
    1370     2127723 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
    1371             :     // get data (id) and atom (im) indexes
    1372     2111358 :     unsigned id = nl_[i] / GMM_m_size;
    1373     2111358 :     unsigned im = nl_[i] % GMM_m_size;
    1374             :     // get index in auxiliary lists
    1375     2111358 :     unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
    1376             :     // add overlap with im component of model GMM
    1377     2111358 :     ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
    1378     2111358 :                              inv_cov_md_[kaux], ovmd_der_[i]);
    1379             :   }
    1380             :   // communicate stuff
    1381       16365 :   if(size_>1) {
    1382       10902 :     comm.Sum(&ovmd_[0], ovmd_.size());
    1383       10902 :     comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
    1384             :   }
    1385       16365 : }
    1386             : 
    1387           0 : double EMMI::scaleEnergy(double s)
    1388             : {
    1389             :   double ene = 0.0;
    1390           0 :   for(unsigned i=0; i<ovdd_.size(); ++i) {
    1391           0 :     ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
    1392             :   }
    1393           0 :   return ene;
    1394             : }
    1395             : 
    1396           0 : double EMMI::doRegression()
    1397             : {
    1398             : // standard MC parameters
    1399             :   unsigned MCsteps = 100000;
    1400             :   double kbtmin = 1.0;
    1401             :   double kbtmax = 10.0;
    1402             :   unsigned ncold = 5000;
    1403             :   unsigned nhot = 2000;
    1404             :   double MCacc = 0.0;
    1405             :   double kbt, ebest, scale_best;
    1406             : 
    1407             : // initial value of scale factor and energy
    1408           0 :   double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
    1409           0 :   double ene = scaleEnergy(scale);
    1410             : // set best energy
    1411           0 :   ebest = ene;
    1412             : 
    1413             : // MC loop
    1414           0 :   for(unsigned istep=0; istep<MCsteps; ++istep) {
    1415             :     // get temperature
    1416           0 :     if(istep%(ncold+nhot)<ncold) kbt = kbtmin;
    1417             :     else kbt = kbtmax;
    1418             :     // propose move in scale
    1419           0 :     double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
    1420           0 :     double new_scale = scale + ds;
    1421             :     // check boundaries
    1422           0 :     if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;}
    1423           0 :     if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;}
    1424             :     // new energy
    1425           0 :     double new_ene = scaleEnergy(new_scale);
    1426             :     // accept or reject
    1427           0 :     bool accept = doAccept(ene, new_ene, kbt);
    1428             :     // in case of acceptance
    1429           0 :     if(accept) {
    1430             :       scale = new_scale;
    1431             :       ene = new_ene;
    1432           0 :       MCacc += 1.0;
    1433             :     }
    1434             :     // save best
    1435           0 :     if(ene<ebest) {
    1436           0 :       ebest = ene;
    1437           0 :       scale_best = scale;
    1438             :     }
    1439             :   }
    1440             : // calculate acceptance
    1441           0 :   double accscale = MCacc / static_cast<double>(MCsteps);
    1442             : // global communication
    1443           0 :   if(!no_aver_ && nrep_>1) {
    1444           0 :     if(replica_!=0) {
    1445           0 :       scale_best = 0.0;
    1446           0 :       ebest = 0.0;
    1447           0 :       accscale = 0.0;
    1448             :     }
    1449           0 :     if(rank_==0) {
    1450           0 :       multi_sim_comm.Sum(&scale_best, 1);
    1451           0 :       multi_sim_comm.Sum(&ebest, 1);
    1452           0 :       multi_sim_comm.Sum(&accscale, 1);
    1453             :     }
    1454             :   }
    1455             :   // local communication
    1456           0 :   if(rank_!=0) {
    1457           0 :     scale_best = 0.0;
    1458           0 :     ebest = 0.0;
    1459           0 :     accscale = 0.0;
    1460             :   }
    1461           0 :   if(size_>1) {
    1462           0 :     comm.Sum(&scale_best, 1);
    1463           0 :     comm.Sum(&ebest, 1);
    1464           0 :     comm.Sum(&accscale, 1);
    1465             :   }
    1466             : // set scale parameters
    1467           0 :   getPntrToComponent("accscale")->set(accscale);
    1468           0 :   getPntrToComponent("enescale")->set(ebest);
    1469             : // return scale value
    1470           0 :   return scale_best;
    1471             : }
    1472             : 
    1473           0 : double EMMI::get_annealing(long long int step)
    1474             : {
    1475             : // default no annealing
    1476             :   double fact = 1.0;
    1477             : // position in annealing cycle
    1478           0 :   unsigned nc = step%(4*nanneal_);
    1479             : // useful doubles
    1480           0 :   double ncd = static_cast<double>(nc);
    1481           0 :   double nn  = static_cast<double>(nanneal_);
    1482             : // set fact
    1483           0 :   if(nc>=nanneal_   && nc<2*nanneal_) fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
    1484           0 :   if(nc>=2*nanneal_ && nc<3*nanneal_) fact = kanneal_;
    1485           0 :   if(nc>=3*nanneal_)                  fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
    1486           0 :   return fact;
    1487             : }
    1488             : 
    1489       16365 : void EMMI::get_weights(double &weight, double &norm, double &neff)
    1490             : {
    1491       16365 :   const double dnrep = static_cast<double>(nrep_);
    1492             :   // calculate the weights either from BIAS
    1493       16365 :   if(do_reweight_) {
    1494          12 :     std::vector<double> bias(nrep_,0);
    1495          12 :     if(rank_==0) {
    1496          12 :       bias[replica_] = getArgument(0);
    1497          12 :       if(nrep_>1) multi_sim_comm.Sum(&bias[0], nrep_);
    1498             :     }
    1499          12 :     comm.Sum(&bias[0], nrep_);
    1500             : 
    1501             :     // accumulate weights
    1502          12 :     if(!first_time_w_) {
    1503          30 :       for(unsigned i=0; i<nrep_; ++i) {
    1504          20 :         const double delta=bias[i]-average_weights_[i];
    1505             :         // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
    1506             :         // probably because we're making several calls to calculate(), causing accumulation
    1507             :         // of epsilons. Maybe we can work on a temporary copy of the action instead?
    1508          20 :         average_weights_[i]+=decay_w_*delta;
    1509             :       }
    1510             :     } else {
    1511           2 :       first_time_w_ = false;
    1512           6 :       for(unsigned i=0; i<nrep_; ++i) average_weights_[i] = bias[i];
    1513             :     }
    1514             : 
    1515             :     // set average back into bias and set norm to one
    1516          12 :     const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
    1517          36 :     for(unsigned i=0; i<nrep_; ++i) bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
    1518             :     // set local weight, norm and weight variance
    1519          12 :     weight = bias[replica_];
    1520             :     double w2=0.;
    1521          36 :     for(unsigned i=0; i<nrep_; ++i) {
    1522          24 :       w2 += bias[i]*bias[i];
    1523          24 :       norm += bias[i];
    1524             :     }
    1525          12 :     neff = norm*norm/w2;
    1526          24 :     getPntrToComponent("weight")->set(weight/norm);
    1527             :   } else {
    1528             :     // or arithmetic ones
    1529       16353 :     neff = dnrep;
    1530       16353 :     weight = 1.0;
    1531       16353 :     norm = dnrep;
    1532             :   }
    1533       16365 :   getPntrToComponent("neff")->set(neff);
    1534       16365 : }
    1535             : 
    1536       16365 : void EMMI::calculate()
    1537             : {
    1538             : 
    1539             : // calculate CV
    1540       16365 :   calculate_overlap();
    1541             : 
    1542             :   // rescale factor for ensemble average
    1543       16365 :   double weight = 0.;
    1544       16365 :   double neff = 0.;
    1545       16365 :   double norm = 0.;
    1546       16365 :   get_weights(weight, norm, neff);
    1547             : 
    1548             :   // in case of ensemble averaging, calculate average overlap
    1549       16365 :   if(!no_aver_ && nrep_>1) {
    1550             :     // if master node, calculate average across replicas
    1551          12 :     if(rank_==0) {
    1552       10812 :       for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] = weight / norm * ovmd_[i];
    1553          12 :       multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
    1554             :     } else {
    1555           0 :       for(unsigned i=0; i<ovmd_ave_.size(); ++i) ovmd_ave_[i] = 0.0;
    1556             :     }
    1557             :     // local communication
    1558          12 :     if(size_>1) comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
    1559             :   } else {
    1560      163530 :     for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] = ovmd_[i];
    1561             :   }
    1562             : 
    1563             :   // get time step
    1564       16365 :   long long int step = getStep();
    1565             : 
    1566             :   // do regression
    1567       16365 :   if(nregres_>0) {
    1568           0 :     if(step%nregres_==0 && !getExchangeStep()) scale_ = doRegression();
    1569             :     // set scale component
    1570           0 :     getPntrToComponent("scale")->set(scale_);
    1571             :   }
    1572             : 
    1573             :   // write model overlap to file
    1574       16365 :   if(ovstride_>0 && step%ovstride_==0) write_model_overlap(step);
    1575             : 
    1576             :   // clear energy and virial
    1577       16365 :   ene_ = 0.0;
    1578       16365 :   virial_.zero();
    1579             : 
    1580             :   // Gaussian noise
    1581       16365 :   if(noise_==0) calculate_Gauss();
    1582             : 
    1583             :   // Outliers noise
    1584       16365 :   if(noise_==1) calculate_Outliers();
    1585             : 
    1586             :   // Marginal noise
    1587       16365 :   if(noise_==2) calculate_Marginal();
    1588             : 
    1589             :   // get annealing rescale factor
    1590       16365 :   if(nanneal_>0) {
    1591           0 :     anneal_ = get_annealing(step);
    1592           0 :     getPntrToComponent("anneal")->set(anneal_);
    1593             :   }
    1594             : 
    1595             :   // annealing rescale
    1596       16365 :   ene_ /= anneal_;
    1597             : 
    1598       16365 :   std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
    1599             :   // in case of ensemble averaging
    1600       16365 :   if(!no_aver_ && nrep_>1) {
    1601             :     // if master node, sum der_GMMid derivatives and ene
    1602          12 :     if(rank_==0) {
    1603       10812 :       for(unsigned i=0; i<GMMid_der_.size(); ++i) {
    1604       10800 :         GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
    1605             :       }
    1606          12 :       multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
    1607          12 :       multi_sim_comm.Sum(&ene_, 1);
    1608             :     } else {
    1609             :       // set der_GMMid derivatives and energy to zero
    1610           0 :       for(unsigned i=0; i<GMMid_der_av_.size(); ++i) GMMid_der_av_[i]=0.0;
    1611           0 :       ene_ = 0.0;
    1612             :     }
    1613             :     // local communication
    1614          12 :     if(size_>1) {
    1615           0 :       comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
    1616           0 :       comm.Sum(&ene_, 1);
    1617             :     }
    1618             :   } else {
    1619      163530 :     for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
    1620      147177 :       GMMid_der_av_[i] = GMMid_der_[i];
    1621             :     }
    1622             :   }
    1623             : 
    1624             :   // clean temporary std::vector
    1625     9860991 :   for(unsigned i=0; i<atom_der_.size(); ++i) atom_der_[i] = Vector(0,0,0);
    1626             : 
    1627             :   // get derivatives of bias with respect to atoms
    1628     2127723 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
    1629             :     // get indexes of data and model component
    1630     2111358 :     unsigned id = nl_[i] / GMM_m_type_.size();
    1631     2111358 :     unsigned im = nl_[i] % GMM_m_type_.size();
    1632             :     // chain rule + replica normalization
    1633     2111358 :     Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
    1634     2111358 :     Vector pos;
    1635     2111358 :     if(pbc_) pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
    1636     2111358 :     else     pos = getPosition(im);
    1637             :     // increment derivatives and virial
    1638     2111358 :     atom_der_[im] += tot_der;
    1639     2111358 :     virial_ += Tensor(pos, -tot_der);
    1640             :   }
    1641             : 
    1642             :   // communicate local derivatives and virial
    1643       16365 :   if(size_>1) {
    1644       10902 :     comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
    1645       10902 :     comm.Sum(virial_);
    1646             :   }
    1647             : 
    1648             :   // set derivatives, virial, and score
    1649     9860991 :   for(unsigned i=0; i<atom_der_.size(); ++i) setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
    1650       16365 :   setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
    1651       16365 :   getPntrToComponent("scoreb")->set(ene_);
    1652             : 
    1653       16365 :   if (do_reweight_) {
    1654             :     double w_tmp = 0.;
    1655       10812 :     for (unsigned i = 0; i < ovmd_.size(); ++i) {
    1656       10800 :       w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
    1657             :     }
    1658          12 :     w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
    1659             : 
    1660          12 :     setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
    1661          24 :     getPntrToComponent("biasDer")->set(w_tmp);
    1662             :   }
    1663             : 
    1664             :   // This part is needed only for Gaussian and Outliers noise models
    1665       16365 :   if(noise_!=2) {
    1666             : 
    1667             :     // do Montecarlo
    1668       10914 :     if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) doMonteCarlo();
    1669             : 
    1670             :     // print status
    1671       10914 :     if(step%statusstride_==0) print_status(step);
    1672             : 
    1673             :     // calculate acceptance ratio
    1674       10914 :     double acc = MCaccept_ / MCtrials_;
    1675             : 
    1676             :     // set value
    1677       21828 :     getPntrToComponent("acc")->set(acc);
    1678             : 
    1679             :   }
    1680             : 
    1681       16365 : }
    1682             : 
    1683        5463 : void EMMI::calculate_Gauss()
    1684             : {
    1685             :   // cycle on all the GMM groups
    1686       10926 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1687             :     double eneg = 0.0;
    1688             :     // cycle on all the members of the group
    1689       65322 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1690             :       // id of the GMM component
    1691       59859 :       int GMMid = GMM_d_grps_[i][j];
    1692             :       // calculate deviation
    1693       59859 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
    1694             :       // add to group energy
    1695       59859 :       eneg += 0.5 * dev * dev;
    1696             :       // store derivative for later
    1697       59859 :       GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
    1698             :     }
    1699             :     // add to total energy along with normalizations and prior
    1700        5463 :     ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
    1701             :   }
    1702        5463 : }
    1703             : 
    1704        5451 : void EMMI::calculate_Outliers()
    1705             : {
    1706             :   // cycle on all the GMM groups
    1707       10902 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1708             :     // cycle on all the members of the group
    1709             :     double eneg = 0.0;
    1710       54510 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1711             :       // id of the GMM component
    1712       49059 :       int GMMid = GMM_d_grps_[i][j];
    1713             :       // calculate deviation
    1714       49059 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
    1715             :       // add to group energy
    1716       49059 :       eneg += std::log1p( 0.5 * dev * dev );
    1717             :       // store derivative for later
    1718       49059 :       GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
    1719             :     }
    1720             :     // add to total energy along with normalizations and prior
    1721        5451 :     ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
    1722             :   }
    1723        5451 : }
    1724             : 
    1725        5451 : void EMMI::calculate_Marginal()
    1726             : {
    1727             :   // cycle on all the GMM groups
    1728       10902 :   for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
    1729             :     // cycle on all the members of the group
    1730       54510 :     for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
    1731             :       // id of the GMM component
    1732       49059 :       int GMMid = GMM_d_grps_[i][j];
    1733             :       // calculate deviation
    1734       49059 :       double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
    1735             :       // calculate errf
    1736       49059 :       double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
    1737             :       // add to group energy
    1738       49059 :       ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
    1739             :       // store derivative for later
    1740       49059 :       GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
    1741             :     }
    1742             :   }
    1743        5451 : }
    1744             : 
    1745             : }
    1746             : }

Generated by: LCOV version 1.16