LCOV - code coverage report
Current view: top level - isdb - EMMI.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 310 342 90.6 %
Date: 2020-11-18 11:20:57 Functions: 23 24 95.8 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2017-2019 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "colvar/Colvar.h"
      23             : #include "colvar/ActionRegister.h"
      24             : #include "core/PlumedMain.h"
      25             : #include "tools/Matrix.h"
      26             : #include "core/SetupMolInfo.h"
      27             : #include "core/ActionSet.h"
      28             : #include "tools/File.h"
      29             : 
      30             : #include <string>
      31             : #include <cmath>
      32             : #include <map>
      33             : #include <numeric>
      34             : #include <ctime>
      35             : #include <sstream>
      36             : 
      37             : using namespace std;
      38             : 
      39             : namespace PLMD {
      40             : namespace isdb {
      41             : 
      42             : //+PLUMEDOC ISDB_COLVAR EMMI
      43             : /*
      44             : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
      45             : 
      46             : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in  Ref. \cite Hanot113951 .
      47             : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
      48             : 
      49             : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
      50             : GMM_FILE. We are currently working on a web server to perform
      51             : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
      52             : 
      53             : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
      54             : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an esemble of structures using
      55             : the Metainference approach \cite Bonomi:2016ip .
      56             : 
      57             : \warning
      58             : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
      59             : 
      60             : \note
      61             : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
      62             : In this case, the user should add the NO_AVER flag to the input line.
      63             : 
      64             : \note
      65             : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
      66             : add the NOPBC flag to the input line
      67             : 
      68             : \par Examples
      69             : 
      70             : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
      71             : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
      72             : 
      73             : \verbatim
      74             : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
      75             :      0  2.9993805e+01   6.54628 10.37820 -0.92988  2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02  1
      76             :      1  2.3468312e+01   6.56095 10.34790 -0.87808  1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02  1
      77             :      ...
      78             : \endverbatim
      79             : 
      80             : To accelerate the computation of the Bayesian score, one can:
      81             : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
      82             : - calculate the restraint every other step (or more).
      83             : 
      84             : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
      85             : using a GROMACS index file.
      86             : 
      87             : The input file looks as follows:
      88             : 
      89             : \plumedfile
      90             : # include pdb info
      91             : MOLINFO STRUCTURE=prot.pdb
      92             : 
      93             : #  all heavy atoms
      94             : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
      95             : 
      96             : # create EMMI score
      97             : gmm: EMMI NOPBC SIGMA_MEAN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
      98             : 
      99             : # translate into bias - apply every 2 steps
     100             : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
     101             : 
     102             : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
     103             : \endplumedfile
     104             : 
     105             : 
     106             : */
     107             : //+ENDPLUMEDOC
     108             : 
     109          12 : class EMMI : public Colvar {
     110             : 
     111             : private:
     112             : 
     113             : // temperature in kbt
     114             :   double kbt_;
     115             : // model GMM - weights and atom types
     116             :   vector<double>   GMM_m_w_;
     117             :   vector<unsigned> GMM_m_type_;
     118             : // data GMM - means, weights, and covariances + beta option
     119             :   vector<Vector>             GMM_d_m_;
     120             :   vector<double>             GMM_d_w_;
     121             :   vector< VectorGeneric<6> > GMM_d_cov_;
     122             :   vector<int>                GMM_d_beta_;
     123             : // overlaps
     124             :   vector<double> ovmd_;
     125             :   vector<double> ovdd_;
     126             :   vector<double> ovmd_ave_;
     127             :   double ov_cut_;
     128             :   vector<double> ovdd_cut_;
     129             : // and derivatives
     130             :   vector<Vector> ovmd_der_;
     131             :   vector<Vector> atom_der_;
     132             :   vector<Vector> atom_der_b_;
     133             :   vector<double> err_f_;
     134             : // constant quantities;
     135             :   double cfact_;
     136             :   double inv_sqrt2_, sqrt2_pi_;
     137             : // metainference
     138             :   unsigned nrep_;
     139             :   unsigned replica_;
     140             :   vector<double> sigma_mean_;
     141             : 
     142             : // auxiliary stuff
     143             : // list of atom sigmas
     144             :   vector<double> s_map_;
     145             : // list of prefactors for overlap between two components of model and data GMM
     146             : // fact_md = w_m * w_d / (2pi)**1.5 / sqrt(det_md)
     147             :   vector< double > fact_md_;
     148             : // inverse of the sum of model and data covariances matrices
     149             :   vector< VectorGeneric<6> > inv_cov_md_;
     150             : // neighbor list
     151             :   double   nl_cutoff_;
     152             :   unsigned nl_stride_;
     153             :   bool first_time_, no_aver_;
     154             :   vector < unsigned > nl_;
     155             : // parallel stuff
     156             :   unsigned size_;
     157             :   unsigned rank_;
     158             : 
     159             : // analysis mode
     160             :   bool analysis_;
     161             :   OFile Devfile_;
     162             :   double nframe_;
     163             : 
     164             : // pbc
     165             :   bool pbc_;
     166             : 
     167             : // calculate model GMM weights and covariances - these are constants
     168             :   void get_GMM_m(vector<AtomNumber> &atoms);
     169             : // read data GMM file
     170             :   void get_GMM_d(string gmm_file);
     171             : // normalize GMM
     172             :   void normalize_GMM(vector<double> &w);
     173             : // check GMM data
     174             :   void check_GMM_d(VectorGeneric<6> &cov, double w);
     175             : 
     176             : // get auxiliary stuff
     177             :   void get_auxiliary_stuff();
     178             : // get cutoff in overlap
     179             :   void get_cutoff_ov();
     180             : // get fact_md and inv_cov_md
     181             :   double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
     182             :                                 double &GMM_w_0, double &GMM_w_1,
     183             :                                 VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
     184             : // calculate self overlaps between data GMM components - ovdd_
     185             :   double get_self_overlap(unsigned id);
     186             : // calculate overlap between two components
     187             :   double get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
     188             :                      const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
     189             :   double get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
     190             :                      const VectorGeneric<6> &inv_cov_md);
     191             : // update the neighbor list
     192             :   void update_neighbor_list();
     193             : // calculate overlap
     194             :   void calculate_overlap();
     195             : 
     196             : public:
     197             :   static void registerKeywords( Keywords& keys );
     198             :   explicit EMMI(const ActionOptions&);
     199             : // active methods:
     200             :   void prepare();
     201             :   virtual void calculate();
     202             : };
     203             : 
     204        6456 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
     205             : 
     206           5 : void EMMI::registerKeywords( Keywords& keys ) {
     207           5 :   Colvar::registerKeywords( keys );
     208          20 :   keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
     209          20 :   keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
     210          20 :   keys.add("compulsory","TEMP","temperature");
     211          15 :   keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
     212          15 :   keys.addFlag("ANALYSIS",false,"run in analysis mode");
     213          20 :   keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
     214          20 :   keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
     215          20 :   keys.add("compulsory","SIGMA_MEAN","starting value for the uncertainty in the mean estimate");
     216           5 :   componentsAreNotOptional(keys);
     217          20 :   keys.addOutputComponent("score",   "default","Bayesian score");
     218          20 :   keys.addOutputComponent("scoreb",  "default","Beta Bayesian score");
     219           5 : }
     220             : 
     221           4 : EMMI::EMMI(const ActionOptions&ao):
     222             :   PLUMED_COLVAR_INIT(ao),
     223             :   inv_sqrt2_(0.707106781186548),
     224             :   sqrt2_pi_(0.797884560802865),
     225             :   nl_cutoff_(-1.0), nl_stride_(0),
     226             :   first_time_(true), no_aver_(false),
     227          40 :   analysis_(false), nframe_(0.0), pbc_(true)
     228             : {
     229             : 
     230           4 :   bool nopbc=!pbc_;
     231           8 :   parseFlag("NOPBC",nopbc);
     232           4 :   pbc_=!nopbc;
     233             : 
     234             :   vector<AtomNumber> atoms;
     235           8 :   parseAtomList("ATOMS",atoms);
     236             : 
     237             :   string GMM_file;
     238           8 :   parse("GMM_FILE",GMM_file);
     239             : 
     240             :   // uncertainty stuff
     241             :   double sigma_mean;
     242           8 :   parse("SIGMA_MEAN",sigma_mean);
     243             : 
     244             :   // temperature
     245           4 :   double temp=0.0;
     246           8 :   parse("TEMP",temp);
     247             :   // convert temp to kbt
     248           8 :   if(temp>0.0) kbt_=plumed.getAtoms().getKBoltzmann()*temp;
     249           0 :   else kbt_=plumed.getAtoms().getKbT();
     250             : 
     251             :   // neighbor list stuff
     252           8 :   parse("NL_CUTOFF",nl_cutoff_);
     253           4 :   if(nl_cutoff_<=0.0) error("NL_CUTOFF should be explicitly specified and positive");
     254           8 :   parse("NL_STRIDE",nl_stride_);
     255           4 :   if(nl_stride_<=0) error("NL_STRIDE should be explicitly specified and positive");
     256             : 
     257           8 :   parseFlag("NO_AVER",no_aver_);
     258           8 :   parseFlag("ANALYSIS",analysis_);
     259             : 
     260           4 :   checkRead();
     261             : 
     262             :   // set parallel stuff
     263           4 :   size_=comm.Get_size();
     264           4 :   rank_=comm.Get_rank();
     265             : 
     266             :   // get number of replicas
     267           4 :   if(rank_==0) {
     268           3 :     if(no_aver_) {
     269           3 :       nrep_ = 1;
     270           3 :       replica_ = 0;
     271             :     } else {
     272           0 :       nrep_ = multi_sim_comm.Get_size();
     273           0 :       replica_ = multi_sim_comm.Get_rank();
     274             :     }
     275             :   } else {
     276           1 :     nrep_ = 0;
     277           1 :     replica_ = 0;
     278             :   }
     279           4 :   comm.Sum(&nrep_,1);
     280           4 :   comm.Sum(&replica_,1);
     281             : 
     282           4 :   log.printf("  atoms involved : ");
     283        7232 :   for(unsigned i=0; i<atoms.size(); ++i) log.printf("%d ",atoms[i].serial());
     284           4 :   log.printf("\n");
     285           8 :   log.printf("  GMM data file : %s\n", GMM_file.c_str());
     286           4 :   if(no_aver_) log.printf("  without ensemble averaging\n");
     287           4 :   log.printf("  neighbor list overlap cutoff : %lf\n", nl_cutoff_);
     288           4 :   log.printf("  neighbor list stride : %u\n",  nl_stride_);
     289           4 :   log.printf("  uncertainty in the mean estimate %f\n",sigma_mean);
     290           4 :   log.printf("  temperature of the system in energy unit %f\n",kbt_);
     291           4 :   log.printf("  number of replicas %u\n",nrep_);
     292             : 
     293             : 
     294             :   // set constant quantity before calculating stuff
     295           4 :   cfact_ = 1.0/pow( 2.0*pi, 1.5 );
     296             : 
     297             :   // calculate model GMM constant parameters
     298           4 :   get_GMM_m(atoms);
     299             : 
     300             :   // read data GMM parameters
     301           8 :   get_GMM_d(GMM_file);
     302           8 :   log.printf("  number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
     303             : 
     304             :   // normalize GMMs
     305           4 :   normalize_GMM(GMM_m_w_);
     306           4 :   normalize_GMM(GMM_d_w_);
     307             : 
     308             :   // get self overlaps between data GMM components
     309        4712 :   for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
     310        1568 :     double ov = get_self_overlap(i);
     311        1568 :     ovdd_.push_back(ov);
     312        3136 :     sigma_mean_.push_back(sigma_mean*ov);
     313             :   }
     314             : 
     315             :   // calculate auxiliary stuff
     316           4 :   get_auxiliary_stuff();
     317             : 
     318             :   // get cutoff for overlap calculation - avoid millions of exp calculations
     319           4 :   get_cutoff_ov();
     320             : 
     321             :   // and prepare temporary vectors
     322           4 :   ovmd_.resize(GMM_d_w_.size());
     323           4 :   err_f_.resize(GMM_d_w_.size());
     324           4 :   atom_der_.resize(GMM_m_w_.size());
     325           4 :   atom_der_b_.resize(GMM_m_w_.size());
     326             : 
     327             :   // clear things that are not needed anymore
     328             :   GMM_d_cov_.clear();
     329             : 
     330             :   // add components
     331          12 :   addComponentWithDerivatives("score");  componentIsNotPeriodic("score");
     332          12 :   addComponentWithDerivatives("scoreb"); componentIsNotPeriodic("scoreb");
     333             : 
     334             :   // request the atoms
     335           4 :   requestAtoms(atoms);
     336             : 
     337          12 :   log<<"  Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
     338          12 :   log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
     339          12 :   log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
     340           4 :   log<<"\n";
     341           4 : }
     342             : 
     343           4 : void EMMI::get_GMM_m(vector<AtomNumber> &atoms)
     344             : {
     345           8 :   vector<SetupMolInfo*> moldat=plumed.getActionSet().select<SetupMolInfo*>();
     346             : 
     347             :   // map of atom types to A and B coefficients of scattering factor
     348             :   // f(s) = A * exp(-B*s**2)
     349             :   // B is in Angstrom squared
     350             :   map<string, double> w_map;
     351           8 :   w_map["C"] = 2.49982; // type 0
     352           8 :   w_map["O"] = 1.97692;  // type 1
     353           8 :   w_map["N"] = 2.20402; // type 2
     354           8 :   w_map["S"] = 5.14099;  // type 3
     355             :   // map between an atom type and an index
     356             :   map<string, unsigned> type_map;
     357           8 :   type_map["C"]=0;
     358           8 :   type_map["O"]=1;
     359           8 :   type_map["N"]=2;
     360           8 :   type_map["S"]=3;
     361             :   // fill in the sigma vector
     362           8 :   s_map_.push_back(15.146);
     363           8 :   s_map_.push_back(8.59722);
     364           8 :   s_map_.push_back(11.1116);
     365           8 :   s_map_.push_back(15.8952);
     366             : 
     367             :   // check if MOLINFO line is present
     368           4 :   if( moldat.size()==1 ) {
     369           4 :     log<<"  MOLINFO DATA found, using proper atom names\n";
     370        7232 :     for(unsigned i=0; i<atoms.size(); ++i) {
     371             :       // get atom name
     372        4816 :       string name = moldat[0]->getAtomName(atoms[i]);
     373             :       char type;
     374             :       // get atom type
     375        2408 :       char first = name.at(0);
     376             :       // GOLDEN RULE: type is first letter, if not a number
     377        2408 :       if (!isdigit(first)) {
     378             :         type = first;
     379             :         // otherwise is the second
     380             :       } else {
     381           0 :         type = name.at(1);
     382             :       }
     383             :       // check if key in map
     384        2408 :       std::string type_s = std::string(1,type);
     385        2408 :       if(w_map.find(type_s) != w_map.end()) {
     386             :         // save atom type
     387        2408 :         GMM_m_type_.push_back(type_map[type_s]);
     388             :         // this will be normalized to 1 in the final density
     389        2408 :         GMM_m_w_.push_back(w_map[type_s]);
     390             :       } else {
     391           0 :         error("Wrong atom type "+type_s+" from atom name "+name+"\n");
     392             :       }
     393             :     }
     394             :   } else {
     395           0 :     error("MOLINFO DATA not found\n");
     396             :   }
     397           4 : }
     398             : 
     399             : 
     400        1568 : void EMMI::check_GMM_d(VectorGeneric<6> &cov, double w)
     401             : {
     402             : 
     403             : // check if positive defined, by calculating the 3 leading principal minors
     404        1568 :   double pm1 = cov[0];
     405        1568 :   double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
     406        1568 :   double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
     407             : // apply Sylvester’s criterion
     408        1568 :   if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0)
     409           0 :     error("check data GMM: covariance matrix is not positive defined");
     410             : 
     411             : // check if weight is positive
     412        1568 :   if(w<0.0) error("check data GMM: weight must be positive");
     413        1568 : }
     414             : 
     415             : 
     416             : // read GMM data file in PLUMED format:
     417           4 : void EMMI::get_GMM_d(string GMM_file)
     418             : {
     419           4 :   VectorGeneric<6> cov;
     420             : 
     421             : // open file
     422           4 :   IFile *ifile = new IFile();
     423           4 :   if(ifile->FileExist(GMM_file)) {
     424           4 :     ifile->open(GMM_file);
     425             :     int idcomp;
     426        4712 :     while(ifile->scanField("Id",idcomp)) {
     427             :       int beta;
     428             :       double w, m0, m1, m2;
     429        3136 :       ifile->scanField("Weight",w);
     430        3136 :       ifile->scanField("Mean_0",m0);
     431        3136 :       ifile->scanField("Mean_1",m1);
     432        3136 :       ifile->scanField("Mean_2",m2);
     433        3136 :       ifile->scanField("Cov_00",cov[0]);
     434        3136 :       ifile->scanField("Cov_01",cov[1]);
     435        3136 :       ifile->scanField("Cov_02",cov[2]);
     436        3136 :       ifile->scanField("Cov_11",cov[3]);
     437        3136 :       ifile->scanField("Cov_12",cov[4]);
     438        3136 :       ifile->scanField("Cov_22",cov[5]);
     439        3136 :       ifile->scanField("Beta",beta);
     440             :       // check input
     441        1568 :       check_GMM_d(cov, w);
     442             :       // center of the Gaussian
     443        3136 :       GMM_d_m_.push_back(Vector(m0,m1,m2));
     444             :       // covariance matrix
     445        1568 :       GMM_d_cov_.push_back(cov);
     446             :       // weights
     447        1568 :       GMM_d_w_.push_back(w);
     448             :       // beta
     449        1568 :       GMM_d_beta_.push_back(beta);
     450             :       // new line
     451        1568 :       ifile->scanField();
     452             :     }
     453           4 :     ifile->close();
     454             :   } else {
     455           0 :     error("Cannot find GMM_FILE "+GMM_file+"\n");
     456             :   }
     457           4 :   delete ifile;
     458             : 
     459           4 : }
     460             : 
     461             : // normalize GMM to sum to 1
     462             : // since all the GMM components are individually normalized, we just need to
     463             : // divide each weight for the sum of the weights
     464           8 : void EMMI::normalize_GMM(vector<double> &w)
     465             : {
     466             :   double norm = accumulate(w.begin(), w.end(), 0.0);
     467       11944 :   for(unsigned i=0; i<w.size(); ++i) w[i] /= norm;
     468           8 : }
     469             : 
     470           4 : void EMMI::get_auxiliary_stuff()
     471             : {
     472           4 :   VectorGeneric<6> cov, sum, inv_sum;
     473             :   // cycle on all atoms types
     474          36 :   for(unsigned i=0; i<4; ++i) {
     475             :     // the Gaussian in density (real) space is the FT of scattering factor
     476             :     // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
     477          32 :     double s = sqrt ( 0.5 * s_map_[i] ) / pi * 0.1;
     478             :     // covariance matrix for spherical Gaussian
     479          16 :     cov[0]=s*s; cov[1]=0.0; cov[2]=0.0;
     480          16 :     cov[3]=s*s; cov[4]=0.0;
     481          16 :     cov[5]=s*s;
     482             :     // cycle on all data GMM
     483       18848 :     for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
     484             :       // we need the sum of the covariance matrices
     485       81536 :       for(unsigned k=0; k<6; ++k) sum[k] = cov[k] + GMM_d_cov_[j][k];
     486             :       // and to calculate its determinant
     487        6272 :       double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
     488        6272 :       det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
     489        6272 :       det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
     490             :       // the constant part of the prefactor is
     491        6272 :       double pre_fact =  cfact_ / sqrt(det);
     492             :       // and its inverse
     493        6272 :       inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
     494        6272 :       inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
     495        6272 :       inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
     496        6272 :       inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
     497        6272 :       inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
     498        6272 :       inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
     499             :       // now we store the pre_fact
     500        6272 :       fact_md_.push_back(pre_fact);
     501             :       // and the inverse of the sum
     502        6272 :       inv_cov_md_.push_back(inv_sum);
     503             :     }
     504             :   }
     505             : 
     506           4 : }
     507             : 
     508             : // get prefactors
     509      614656 : double EMMI::get_prefactor_inverse
     510             : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
     511             :  double &GMM_w_0, double &GMM_w_1,
     512             :  VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum)
     513             : {
     514             : // we need the sum of the covariance matrices
     515     4302592 :   for(unsigned k=0; k<6; ++k) sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
     516             : 
     517             : // and to calculate its determinant
     518      614656 :   double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
     519      614656 :   det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
     520      614656 :   det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
     521             : 
     522             : // the prefactor is
     523      614656 :   double pre_fact =  cfact_ / sqrt(det) * GMM_w_0 * GMM_w_1;
     524             : 
     525             : // and its inverse
     526      614656 :   inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
     527      614656 :   inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
     528      614656 :   inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
     529      614656 :   inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
     530      614656 :   inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
     531      614656 :   inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
     532             : 
     533             : // return pre-factor
     534      614656 :   return pre_fact;
     535             : }
     536             : 
     537        1568 : double EMMI::get_self_overlap(unsigned id)
     538             : {
     539             :   vector<double> ov;
     540        1568 :   VectorGeneric<6> sum, inv_sum;
     541        1568 :   Vector ov_der;
     542             : // start loop
     543     1847104 :   for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
     544             :     // call auxiliary method
     545      614656 :     double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
     546     1229312 :                                             GMM_d_w_[id],   GMM_d_w_[i], sum, inv_sum);
     547             :     // calculate overlap
     548      614656 :     double ov_tmp = get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
     549             :     // add to list
     550      614656 :     ov.push_back(ov_tmp);
     551             :   }
     552             : // calculate total
     553             :   double ov_tot = accumulate(ov.begin(), ov.end(), 0.0);
     554             : // sort in ascending order
     555             :   std::sort(ov.begin(), ov.end());
     556             : // get cutoff = nl_cutoff_ * ov_tot
     557        1568 :   double ov_cut = ov_tot * nl_cutoff_;
     558             : // integrate tail of ov
     559             :   double ov_sum = 0.0;
     560     1769872 :   for(unsigned i=1; i<ov.size(); ++i) {
     561      590480 :     ov_sum += ov[i];
     562      590480 :     if(ov_sum >= ov_cut) {
     563        3136 :       ov_cut = ov[i-1];
     564        1568 :       break;
     565             :     }
     566             :   }
     567             : // store
     568        1568 :   ovdd_cut_.push_back(ov_cut);
     569             : // and return it
     570        1568 :   return ov_tot;
     571             : }
     572             : 
     573             : // this is to avoid the calculation of millions of exp function
     574             : // when updating the neighbor list using calculate_overlap
     575           4 : void EMMI::get_cutoff_ov()
     576             : {
     577             :   // temporary stuff
     578           4 :   unsigned GMM_d_w_size = GMM_d_w_.size();
     579             :   // set ov_cut_ to a huge number
     580           4 :   ov_cut_ = 1.0+9;
     581             :   // calculate minimum value needed for cutoff
     582        3140 :   for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
     583     2834944 :     for(unsigned j=0; j<GMM_m_w_.size(); ++j) {
     584             :       // get atom type
     585      943936 :       unsigned jtype = GMM_m_type_[j];
     586             :       // get index in auxiliary lists
     587      943936 :       unsigned kaux = jtype * GMM_d_w_size + i;
     588             :       // get prefactor and multiply by weights
     589     3775744 :       double pre_fact = fact_md_[kaux] * GMM_d_w_[i] * GMM_m_w_[j];
     590             :       // calculate ov
     591      943936 :       double ov = ovdd_cut_[i] / pre_fact;
     592             :       // check
     593      943936 :       if(ov < ov_cut_) ov_cut_ = ov;
     594             :     }
     595             :   }
     596             :   // set cutoff
     597           4 :   ov_cut_ = -2.0 * std::log(ov_cut_);
     598           4 : }
     599             : 
     600             : // version with derivatives
     601    10788184 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
     602             :                          const VectorGeneric<6> &inv_cov_md, Vector &ov_der)
     603             : {
     604    10788184 :   Vector md;
     605             :   // calculate vector difference m_m-d_m with/without pbc
     606    10788184 :   if(pbc_) md = pbcDistance(d_m, m_m);
     607    10788184 :   else     md = delta(d_m, m_m);
     608             :   // calculate product of transpose of md and inv_cov_md
     609    10788184 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
     610    10788184 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
     611    10788184 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
     612             :   // calculate product of prod and md
     613    10788184 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
     614             :   // final calculation
     615    10788184 :   ov = fact_md * exp(-0.5*ov);
     616             :   // derivatives
     617    10788184 :   ov_der = ov * Vector(p_x, p_y, p_z);
     618    10788184 :   return ov;
     619             : }
     620             : 
     621             : // fast version without derivatives and cutoff used for neighbor list
     622   429018912 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
     623             :                          const VectorGeneric<6> &inv_cov_md)
     624             : 
     625             : {
     626   429018912 :   Vector md;
     627             :   // calculate vector difference m_m-d_m with/without pbc
     628   429018912 :   if(pbc_) md = pbcDistance(d_m, m_m);
     629   429018912 :   else     md = delta(d_m, m_m);
     630             :   // calculate product of transpose of md and inv_cov_md
     631   429018912 :   double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
     632   429018912 :   double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
     633   429018912 :   double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
     634             :   // calculate product of prod and md
     635   429018912 :   double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
     636             :   // final calculation
     637   429018912 :   if( ov > ov_cut_ ) {
     638             :     ov = 0.0;
     639             :   } else {
     640    24333930 :     ov = fact_md * exp(-0.5*ov);
     641             :   }
     642   429018912 :   return ov;
     643             : }
     644             : 
     645        1819 : void EMMI::update_neighbor_list()
     646             : {
     647             :   // temp stuff
     648        1819 :   unsigned GMM_d_w_size = GMM_d_w_.size();
     649        1819 :   unsigned GMM_m_w_size = GMM_m_w_.size();
     650             :   // local neighbor list
     651             :   vector < unsigned > nl_l;
     652             :   // clear old neighbor list
     653        1819 :   nl_.clear();
     654             :   // cycle on all overlaps (in parallel)
     655        1819 :   unsigned nover = GMM_d_w_size * GMM_m_w_size;
     656   429020731 :   for(unsigned k=rank_; k<nover; k=k+size_) {
     657             :     // get indexes
     658   429018912 :     unsigned i = k / GMM_m_w_size;
     659   429018912 :     unsigned j = k % GMM_m_w_size;
     660             :     // get atom type
     661   858037824 :     unsigned jtype = GMM_m_type_[j];
     662             :     // get index in auxiliary lists
     663   429018912 :     unsigned kaux = jtype * GMM_d_w_size + i;
     664             :     // get prefactor and multiply by weights
     665  1716075648 :     double pre_fact = fact_md_[kaux] * GMM_d_w_[i] * GMM_m_w_[j];
     666             :     // calculate overlap
     667   858037824 :     double ov = get_overlap(GMM_d_m_[i], getPosition(j), pre_fact, inv_cov_md_[kaux]);
     668             :     // fill the neighbor list
     669   429018912 :     if(ov >= ovdd_cut_[i]) nl_l.push_back(k);
     670             :   }
     671             :   // find total dimension of neighborlist
     672        1819 :   vector <int> recvcounts(size_, 0);
     673        3638 :   recvcounts[rank_] = nl_l.size();
     674        3638 :   comm.Sum(&recvcounts[0], size_);
     675             :   int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
     676             :   // resize neighbor stuff
     677        1819 :   nl_.resize(tot_size);
     678             :   // calculate vector of displacement
     679        1819 :   vector<int> disp(size_);
     680        1819 :   disp[0] = 0;
     681             :   int rank_size = 0;
     682        1823 :   for(unsigned i=0; i<size_-1; ++i) {
     683           4 :     rank_size += recvcounts[i];
     684           4 :     disp[i+1] = rank_size;
     685             :   }
     686             :   // Allgather neighbor list
     687        7276 :   comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
     688             :   // now resize derivatives
     689        1819 :   ovmd_der_.resize(tot_size);
     690        1819 : }
     691             : 
     692           4 : void EMMI::prepare()
     693             : {
     694           4 :   if(getExchangeStep()) first_time_=true;
     695           4 : }
     696             : 
     697             : 
     698             : // overlap calculator
     699        1819 : void EMMI::calculate_overlap() {
     700             : 
     701        1819 :   if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
     702        1819 :     update_neighbor_list();
     703        1819 :     first_time_=false;
     704             :   }
     705             : 
     706             :   // clean temporary vectors
     707     2142782 :   for(unsigned i=0; i<ovmd_.size(); ++i)     ovmd_[i] = 0.0;
     708    30541010 :   for(unsigned i=0; i<ovmd_der_.size(); ++i) ovmd_der_[i] = Vector(0,0,0);
     709             : 
     710             :   // we have to cycle over all model and data GMM components in the neighbor list
     711        1819 :   unsigned GMM_d_w_size = GMM_d_w_.size();
     712        1819 :   unsigned GMM_m_w_size = GMM_m_w_.size();
     713    20350694 :   for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
     714             :     // get indexes of data and model component
     715    10173528 :     unsigned id = nl_[i] / GMM_m_w_size;
     716    10173528 :     unsigned im = nl_[i] % GMM_m_w_size;
     717             :     // get atom type
     718    20347056 :     unsigned jtype = GMM_m_type_[im];
     719             :     // get index in auxiliary lists
     720    10173528 :     unsigned kaux = jtype * GMM_d_w_size + id;
     721             :     // get prefactor and multiply by weights
     722    40694112 :     double pre_fact = fact_md_[kaux] * GMM_d_w_[id] * GMM_m_w_[im];
     723             :     // add overlap with im component of model GMM
     724    30520584 :     ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact,
     725             :                              inv_cov_md_[kaux], ovmd_der_[i]);
     726             :   }
     727             :   // communicate stuff
     728        3638 :   comm.Sum(&ovmd_[0], ovmd_.size());
     729        3638 :   comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
     730        1819 : }
     731             : 
     732             : 
     733        1819 : void EMMI::calculate() {
     734             : 
     735             : // calculate CV
     736        1819 :   calculate_overlap();
     737             : 
     738        1819 :   if(!analysis_) {
     739             : 
     740             :     // BIASING MODE
     741             :     // rescale factor for ensemble average
     742        1819 :     double escale = 1.0 / static_cast<double>(nrep_);
     743             : 
     744             :     // calculate average of ovmd_ across replicas
     745        1819 :     if(!no_aver_) {
     746           0 :       if(comm.Get_rank()==0) {
     747           0 :         multi_sim_comm.Sum(&ovmd_[0], ovmd_.size());
     748           0 :         for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] *= escale;
     749             :       } else {
     750           0 :         for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i]  = 0.0;
     751             :       }
     752           0 :       comm.Sum(&ovmd_[0], ovmd_.size());
     753             :     }
     754             : 
     755             :     // calculate score and scoreb
     756             :     double ene = 0.0;
     757             :     double ene_b = 0.0;
     758     2142782 :     for(unsigned i=0; i<ovmd_.size(); ++i) {
     759             :       // calculate and store err function
     760     2852192 :       err_f_[i] = erf ( ( ovmd_[i]-ovdd_[i] ) * inv_sqrt2_ / sigma_mean_[i] );
     761             :       // energy term
     762     2852192 :       double ene_tmp = -kbt_ * std::log ( 0.5 / (ovmd_[i]-ovdd_[i]) * err_f_[i]) ;
     763             :       // increment energy
     764      713048 :       if(GMM_d_beta_[i] == 1) ene_b += ene_tmp;
     765           0 :       else                    ene   += ene_tmp;
     766             :     }
     767             : 
     768             :     // multiply by number of replicas
     769        1819 :     ene   /= escale;
     770        1819 :     ene_b /= escale;
     771             : 
     772             :     // clear temporary vector
     773     3288752 :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     774     1095038 :       atom_der_[i]   = Vector(0,0,0);
     775     1095038 :       atom_der_b_[i] = Vector(0,0,0);
     776             :     }
     777             :     // virial
     778        1819 :     Tensor virial, virialb;
     779             : 
     780             :     // get derivatives of bias with respect to atoms
     781    20350694 :     for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
     782             :       // get indexes of data and model component
     783    20347056 :       unsigned id = nl_[i] / GMM_m_w_.size();
     784    10173528 :       unsigned im = nl_[i] % GMM_m_w_.size();
     785             :       // first part of derivative
     786    61041168 :       double der = - kbt_/err_f_[id]*sqrt2_pi_*exp(-0.5*(ovmd_[id]-ovdd_[id])*(ovmd_[id]-ovdd_[id])/sigma_mean_[id]/sigma_mean_[id])/sigma_mean_[id];
     787             :       // second part
     788    30520584 :       der += kbt_ / (ovmd_[id]-ovdd_[id]);
     789             :       // chain rule
     790    10173528 :       Vector tot_der = der * ovmd_der_[i];
     791             :       // atom's position in GMM cell
     792    10173528 :       Vector pos;
     793    10173528 :       if(pbc_) pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
     794    20347056 :       else     pos = getPosition(im);
     795             :       // add derivative and virial
     796    10173528 :       if(GMM_d_beta_[id] == 1) {
     797    10173528 :         atom_der_b_[im] += tot_der;
     798    10173528 :         virialb         += Tensor(pos, -tot_der);
     799             :       } else {
     800           0 :         atom_der_[im] += tot_der;
     801           0 :         virial        += Tensor(pos, -tot_der);
     802             :       }
     803             :     }
     804             : 
     805             :     // communicate stuff
     806        3638 :     comm.Sum(&atom_der_[0][0],   3*atom_der_.size());
     807        3638 :     comm.Sum(&atom_der_b_[0][0], 3*atom_der_b_.size());
     808        1819 :     comm.Sum(virial);
     809        1819 :     comm.Sum(virialb);
     810             : 
     811             :     // set derivatives
     812     3288752 :     for(unsigned i=0; i<atom_der_.size(); ++i) {
     813     3285114 :       setAtomsDerivatives(getPntrToComponent("score"),  i, atom_der_[i]);
     814     2190076 :       setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_b_[i]);
     815             :     }
     816             : 
     817             :     // and set virial
     818        3638 :     setBoxDerivatives(getPntrToComponent("score"),  virial);
     819        3638 :     setBoxDerivatives(getPntrToComponent("scoreb"), virialb);
     820             : 
     821             :     // set value of the score
     822        3638 :     getPntrToComponent("score")->set(ene);
     823             :     // set value of the beta score
     824        3638 :     getPntrToComponent("scoreb")->set(ene_b);
     825             : 
     826             :   } else {
     827             : 
     828             :     // ANALYSIS MODE
     829             :     // prepare stuff for the first time
     830           0 :     if(nframe_ <= 0.0) {
     831           0 :       Devfile_.link(*this);
     832           0 :       Devfile_.open("ovmd_deviations.dat");
     833             :       Devfile_.setHeavyFlush();
     834           0 :       Devfile_.fmtField("%12.6f");
     835           0 :       ovmd_ave_.resize(GMM_d_w_.size());
     836           0 :       for(unsigned i=0; i<ovmd_ave_.size(); ++i) ovmd_ave_[i] = 0.0;
     837             :     }
     838             : 
     839             :     // increment number of frames
     840           0 :     nframe_ += 1.0;
     841             : 
     842             :     // add average ovmd_
     843           0 :     for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] += ovmd_[i];
     844             : 
     845             :     // print stuff
     846           0 :     for(unsigned i=0; i<ovmd_.size(); ++i) {
     847             :       // convert i to string
     848           0 :       stringstream ss;
     849             :       ss << i;
     850             :       // labels
     851           0 :       string label = "ovmd_" + ss.str();
     852             :       // print entry
     853           0 :       double ave = ovmd_ave_[i] / nframe_;
     854           0 :       double dev2 = (ave-ovdd_[i])*(ave-ovdd_[i])/ovdd_[i]/ovdd_[i];
     855           0 :       double dev = sqrt(dev2);
     856           0 :       Devfile_.printField(label, dev);
     857             :     }
     858           0 :     Devfile_.printField();
     859             :   }
     860             : 
     861        1819 : }
     862             : 
     863             : }
     864        4839 : }

Generated by: LCOV version 1.13