Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2017-2019 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "colvar/Colvar.h"
23 : #include "colvar/ActionRegister.h"
24 : #include "core/PlumedMain.h"
25 : #include "tools/Matrix.h"
26 : #include "core/SetupMolInfo.h"
27 : #include "core/ActionSet.h"
28 : #include "tools/File.h"
29 :
30 : #include <string>
31 : #include <cmath>
32 : #include <map>
33 : #include <numeric>
34 : #include <ctime>
35 : #include <sstream>
36 :
37 : using namespace std;
38 :
39 : namespace PLMD {
40 : namespace isdb {
41 :
42 : //+PLUMEDOC ISDB_COLVAR EMMI
43 : /*
44 : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
45 :
46 : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in Ref. \cite Hanot113951 .
47 : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
48 :
49 : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
50 : GMM_FILE. We are currently working on a web server to perform
51 : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
52 :
53 : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
54 : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an esemble of structures using
55 : the Metainference approach \cite Bonomi:2016ip .
56 :
57 : \warning
58 : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
59 :
60 : \note
61 : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
62 : In this case, the user should add the NO_AVER flag to the input line.
63 :
64 : \note
65 : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
66 : add the NOPBC flag to the input line
67 :
68 : \par Examples
69 :
70 : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
71 : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
72 :
73 : \verbatim
74 : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
75 : 0 2.9993805e+01 6.54628 10.37820 -0.92988 2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02 1
76 : 1 2.3468312e+01 6.56095 10.34790 -0.87808 1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02 1
77 : ...
78 : \endverbatim
79 :
80 : To accelerate the computation of the Bayesian score, one can:
81 : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
82 : - calculate the restraint every other step (or more).
83 :
84 : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
85 : using a GROMACS index file.
86 :
87 : The input file looks as follows:
88 :
89 : \plumedfile
90 : # include pdb info
91 : MOLINFO STRUCTURE=prot.pdb
92 :
93 : # all heavy atoms
94 : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
95 :
96 : # create EMMI score
97 : gmm: EMMI NOPBC SIGMA_MEAN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
98 :
99 : # translate into bias - apply every 2 steps
100 : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
101 :
102 : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
103 : \endplumedfile
104 :
105 :
106 : */
107 : //+ENDPLUMEDOC
108 :
109 12 : class EMMI : public Colvar {
110 :
111 : private:
112 :
113 : // temperature in kbt
114 : double kbt_;
115 : // model GMM - weights and atom types
116 : vector<double> GMM_m_w_;
117 : vector<unsigned> GMM_m_type_;
118 : // data GMM - means, weights, and covariances + beta option
119 : vector<Vector> GMM_d_m_;
120 : vector<double> GMM_d_w_;
121 : vector< VectorGeneric<6> > GMM_d_cov_;
122 : vector<int> GMM_d_beta_;
123 : // overlaps
124 : vector<double> ovmd_;
125 : vector<double> ovdd_;
126 : vector<double> ovmd_ave_;
127 : double ov_cut_;
128 : vector<double> ovdd_cut_;
129 : // and derivatives
130 : vector<Vector> ovmd_der_;
131 : vector<Vector> atom_der_;
132 : vector<Vector> atom_der_b_;
133 : vector<double> err_f_;
134 : // constant quantities;
135 : double cfact_;
136 : double inv_sqrt2_, sqrt2_pi_;
137 : // metainference
138 : unsigned nrep_;
139 : unsigned replica_;
140 : vector<double> sigma_mean_;
141 :
142 : // auxiliary stuff
143 : // list of atom sigmas
144 : vector<double> s_map_;
145 : // list of prefactors for overlap between two components of model and data GMM
146 : // fact_md = w_m * w_d / (2pi)**1.5 / sqrt(det_md)
147 : vector< double > fact_md_;
148 : // inverse of the sum of model and data covariances matrices
149 : vector< VectorGeneric<6> > inv_cov_md_;
150 : // neighbor list
151 : double nl_cutoff_;
152 : unsigned nl_stride_;
153 : bool first_time_, no_aver_;
154 : vector < unsigned > nl_;
155 : // parallel stuff
156 : unsigned size_;
157 : unsigned rank_;
158 :
159 : // analysis mode
160 : bool analysis_;
161 : OFile Devfile_;
162 : double nframe_;
163 :
164 : // pbc
165 : bool pbc_;
166 :
167 : // calculate model GMM weights and covariances - these are constants
168 : void get_GMM_m(vector<AtomNumber> &atoms);
169 : // read data GMM file
170 : void get_GMM_d(string gmm_file);
171 : // normalize GMM
172 : void normalize_GMM(vector<double> &w);
173 : // check GMM data
174 : void check_GMM_d(VectorGeneric<6> &cov, double w);
175 :
176 : // get auxiliary stuff
177 : void get_auxiliary_stuff();
178 : // get cutoff in overlap
179 : void get_cutoff_ov();
180 : // get fact_md and inv_cov_md
181 : double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
182 : double &GMM_w_0, double &GMM_w_1,
183 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
184 : // calculate self overlaps between data GMM components - ovdd_
185 : double get_self_overlap(unsigned id);
186 : // calculate overlap between two components
187 : double get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
188 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
189 : double get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
190 : const VectorGeneric<6> &inv_cov_md);
191 : // update the neighbor list
192 : void update_neighbor_list();
193 : // calculate overlap
194 : void calculate_overlap();
195 :
196 : public:
197 : static void registerKeywords( Keywords& keys );
198 : explicit EMMI(const ActionOptions&);
199 : // active methods:
200 : void prepare();
201 : virtual void calculate();
202 : };
203 :
204 6456 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
205 :
206 5 : void EMMI::registerKeywords( Keywords& keys ) {
207 5 : Colvar::registerKeywords( keys );
208 20 : keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
209 20 : keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
210 20 : keys.add("compulsory","TEMP","temperature");
211 15 : keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
212 15 : keys.addFlag("ANALYSIS",false,"run in analysis mode");
213 20 : keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
214 20 : keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
215 20 : keys.add("compulsory","SIGMA_MEAN","starting value for the uncertainty in the mean estimate");
216 5 : componentsAreNotOptional(keys);
217 20 : keys.addOutputComponent("score", "default","Bayesian score");
218 20 : keys.addOutputComponent("scoreb", "default","Beta Bayesian score");
219 5 : }
220 :
221 4 : EMMI::EMMI(const ActionOptions&ao):
222 : PLUMED_COLVAR_INIT(ao),
223 : inv_sqrt2_(0.707106781186548),
224 : sqrt2_pi_(0.797884560802865),
225 : nl_cutoff_(-1.0), nl_stride_(0),
226 : first_time_(true), no_aver_(false),
227 40 : analysis_(false), nframe_(0.0), pbc_(true)
228 : {
229 :
230 4 : bool nopbc=!pbc_;
231 8 : parseFlag("NOPBC",nopbc);
232 4 : pbc_=!nopbc;
233 :
234 : vector<AtomNumber> atoms;
235 8 : parseAtomList("ATOMS",atoms);
236 :
237 : string GMM_file;
238 8 : parse("GMM_FILE",GMM_file);
239 :
240 : // uncertainty stuff
241 : double sigma_mean;
242 8 : parse("SIGMA_MEAN",sigma_mean);
243 :
244 : // temperature
245 4 : double temp=0.0;
246 8 : parse("TEMP",temp);
247 : // convert temp to kbt
248 8 : if(temp>0.0) kbt_=plumed.getAtoms().getKBoltzmann()*temp;
249 0 : else kbt_=plumed.getAtoms().getKbT();
250 :
251 : // neighbor list stuff
252 8 : parse("NL_CUTOFF",nl_cutoff_);
253 4 : if(nl_cutoff_<=0.0) error("NL_CUTOFF should be explicitly specified and positive");
254 8 : parse("NL_STRIDE",nl_stride_);
255 4 : if(nl_stride_<=0) error("NL_STRIDE should be explicitly specified and positive");
256 :
257 8 : parseFlag("NO_AVER",no_aver_);
258 8 : parseFlag("ANALYSIS",analysis_);
259 :
260 4 : checkRead();
261 :
262 : // set parallel stuff
263 4 : size_=comm.Get_size();
264 4 : rank_=comm.Get_rank();
265 :
266 : // get number of replicas
267 4 : if(rank_==0) {
268 3 : if(no_aver_) {
269 3 : nrep_ = 1;
270 3 : replica_ = 0;
271 : } else {
272 0 : nrep_ = multi_sim_comm.Get_size();
273 0 : replica_ = multi_sim_comm.Get_rank();
274 : }
275 : } else {
276 1 : nrep_ = 0;
277 1 : replica_ = 0;
278 : }
279 4 : comm.Sum(&nrep_,1);
280 4 : comm.Sum(&replica_,1);
281 :
282 4 : log.printf(" atoms involved : ");
283 7232 : for(unsigned i=0; i<atoms.size(); ++i) log.printf("%d ",atoms[i].serial());
284 4 : log.printf("\n");
285 8 : log.printf(" GMM data file : %s\n", GMM_file.c_str());
286 4 : if(no_aver_) log.printf(" without ensemble averaging\n");
287 4 : log.printf(" neighbor list overlap cutoff : %lf\n", nl_cutoff_);
288 4 : log.printf(" neighbor list stride : %u\n", nl_stride_);
289 4 : log.printf(" uncertainty in the mean estimate %f\n",sigma_mean);
290 4 : log.printf(" temperature of the system in energy unit %f\n",kbt_);
291 4 : log.printf(" number of replicas %u\n",nrep_);
292 :
293 :
294 : // set constant quantity before calculating stuff
295 4 : cfact_ = 1.0/pow( 2.0*pi, 1.5 );
296 :
297 : // calculate model GMM constant parameters
298 4 : get_GMM_m(atoms);
299 :
300 : // read data GMM parameters
301 8 : get_GMM_d(GMM_file);
302 8 : log.printf(" number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
303 :
304 : // normalize GMMs
305 4 : normalize_GMM(GMM_m_w_);
306 4 : normalize_GMM(GMM_d_w_);
307 :
308 : // get self overlaps between data GMM components
309 4712 : for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
310 1568 : double ov = get_self_overlap(i);
311 1568 : ovdd_.push_back(ov);
312 3136 : sigma_mean_.push_back(sigma_mean*ov);
313 : }
314 :
315 : // calculate auxiliary stuff
316 4 : get_auxiliary_stuff();
317 :
318 : // get cutoff for overlap calculation - avoid millions of exp calculations
319 4 : get_cutoff_ov();
320 :
321 : // and prepare temporary vectors
322 4 : ovmd_.resize(GMM_d_w_.size());
323 4 : err_f_.resize(GMM_d_w_.size());
324 4 : atom_der_.resize(GMM_m_w_.size());
325 4 : atom_der_b_.resize(GMM_m_w_.size());
326 :
327 : // clear things that are not needed anymore
328 : GMM_d_cov_.clear();
329 :
330 : // add components
331 12 : addComponentWithDerivatives("score"); componentIsNotPeriodic("score");
332 12 : addComponentWithDerivatives("scoreb"); componentIsNotPeriodic("scoreb");
333 :
334 : // request the atoms
335 4 : requestAtoms(atoms);
336 :
337 12 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
338 12 : log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
339 12 : log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
340 4 : log<<"\n";
341 4 : }
342 :
343 4 : void EMMI::get_GMM_m(vector<AtomNumber> &atoms)
344 : {
345 8 : vector<SetupMolInfo*> moldat=plumed.getActionSet().select<SetupMolInfo*>();
346 :
347 : // map of atom types to A and B coefficients of scattering factor
348 : // f(s) = A * exp(-B*s**2)
349 : // B is in Angstrom squared
350 : map<string, double> w_map;
351 8 : w_map["C"] = 2.49982; // type 0
352 8 : w_map["O"] = 1.97692; // type 1
353 8 : w_map["N"] = 2.20402; // type 2
354 8 : w_map["S"] = 5.14099; // type 3
355 : // map between an atom type and an index
356 : map<string, unsigned> type_map;
357 8 : type_map["C"]=0;
358 8 : type_map["O"]=1;
359 8 : type_map["N"]=2;
360 8 : type_map["S"]=3;
361 : // fill in the sigma vector
362 8 : s_map_.push_back(15.146);
363 8 : s_map_.push_back(8.59722);
364 8 : s_map_.push_back(11.1116);
365 8 : s_map_.push_back(15.8952);
366 :
367 : // check if MOLINFO line is present
368 4 : if( moldat.size()==1 ) {
369 4 : log<<" MOLINFO DATA found, using proper atom names\n";
370 7232 : for(unsigned i=0; i<atoms.size(); ++i) {
371 : // get atom name
372 4816 : string name = moldat[0]->getAtomName(atoms[i]);
373 : char type;
374 : // get atom type
375 2408 : char first = name.at(0);
376 : // GOLDEN RULE: type is first letter, if not a number
377 2408 : if (!isdigit(first)) {
378 : type = first;
379 : // otherwise is the second
380 : } else {
381 0 : type = name.at(1);
382 : }
383 : // check if key in map
384 2408 : std::string type_s = std::string(1,type);
385 2408 : if(w_map.find(type_s) != w_map.end()) {
386 : // save atom type
387 2408 : GMM_m_type_.push_back(type_map[type_s]);
388 : // this will be normalized to 1 in the final density
389 2408 : GMM_m_w_.push_back(w_map[type_s]);
390 : } else {
391 0 : error("Wrong atom type "+type_s+" from atom name "+name+"\n");
392 : }
393 : }
394 : } else {
395 0 : error("MOLINFO DATA not found\n");
396 : }
397 4 : }
398 :
399 :
400 1568 : void EMMI::check_GMM_d(VectorGeneric<6> &cov, double w)
401 : {
402 :
403 : // check if positive defined, by calculating the 3 leading principal minors
404 1568 : double pm1 = cov[0];
405 1568 : double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
406 1568 : double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
407 : // apply Sylvester’s criterion
408 1568 : if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0)
409 0 : error("check data GMM: covariance matrix is not positive defined");
410 :
411 : // check if weight is positive
412 1568 : if(w<0.0) error("check data GMM: weight must be positive");
413 1568 : }
414 :
415 :
416 : // read GMM data file in PLUMED format:
417 4 : void EMMI::get_GMM_d(string GMM_file)
418 : {
419 4 : VectorGeneric<6> cov;
420 :
421 : // open file
422 4 : IFile *ifile = new IFile();
423 4 : if(ifile->FileExist(GMM_file)) {
424 4 : ifile->open(GMM_file);
425 : int idcomp;
426 4712 : while(ifile->scanField("Id",idcomp)) {
427 : int beta;
428 : double w, m0, m1, m2;
429 3136 : ifile->scanField("Weight",w);
430 3136 : ifile->scanField("Mean_0",m0);
431 3136 : ifile->scanField("Mean_1",m1);
432 3136 : ifile->scanField("Mean_2",m2);
433 3136 : ifile->scanField("Cov_00",cov[0]);
434 3136 : ifile->scanField("Cov_01",cov[1]);
435 3136 : ifile->scanField("Cov_02",cov[2]);
436 3136 : ifile->scanField("Cov_11",cov[3]);
437 3136 : ifile->scanField("Cov_12",cov[4]);
438 3136 : ifile->scanField("Cov_22",cov[5]);
439 3136 : ifile->scanField("Beta",beta);
440 : // check input
441 1568 : check_GMM_d(cov, w);
442 : // center of the Gaussian
443 3136 : GMM_d_m_.push_back(Vector(m0,m1,m2));
444 : // covariance matrix
445 1568 : GMM_d_cov_.push_back(cov);
446 : // weights
447 1568 : GMM_d_w_.push_back(w);
448 : // beta
449 1568 : GMM_d_beta_.push_back(beta);
450 : // new line
451 1568 : ifile->scanField();
452 : }
453 4 : ifile->close();
454 : } else {
455 0 : error("Cannot find GMM_FILE "+GMM_file+"\n");
456 : }
457 4 : delete ifile;
458 :
459 4 : }
460 :
461 : // normalize GMM to sum to 1
462 : // since all the GMM components are individually normalized, we just need to
463 : // divide each weight for the sum of the weights
464 8 : void EMMI::normalize_GMM(vector<double> &w)
465 : {
466 : double norm = accumulate(w.begin(), w.end(), 0.0);
467 11944 : for(unsigned i=0; i<w.size(); ++i) w[i] /= norm;
468 8 : }
469 :
470 4 : void EMMI::get_auxiliary_stuff()
471 : {
472 4 : VectorGeneric<6> cov, sum, inv_sum;
473 : // cycle on all atoms types
474 36 : for(unsigned i=0; i<4; ++i) {
475 : // the Gaussian in density (real) space is the FT of scattering factor
476 : // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
477 32 : double s = sqrt ( 0.5 * s_map_[i] ) / pi * 0.1;
478 : // covariance matrix for spherical Gaussian
479 16 : cov[0]=s*s; cov[1]=0.0; cov[2]=0.0;
480 16 : cov[3]=s*s; cov[4]=0.0;
481 16 : cov[5]=s*s;
482 : // cycle on all data GMM
483 18848 : for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
484 : // we need the sum of the covariance matrices
485 81536 : for(unsigned k=0; k<6; ++k) sum[k] = cov[k] + GMM_d_cov_[j][k];
486 : // and to calculate its determinant
487 6272 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
488 6272 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
489 6272 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
490 : // the constant part of the prefactor is
491 6272 : double pre_fact = cfact_ / sqrt(det);
492 : // and its inverse
493 6272 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
494 6272 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
495 6272 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
496 6272 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
497 6272 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
498 6272 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
499 : // now we store the pre_fact
500 6272 : fact_md_.push_back(pre_fact);
501 : // and the inverse of the sum
502 6272 : inv_cov_md_.push_back(inv_sum);
503 : }
504 : }
505 :
506 4 : }
507 :
508 : // get prefactors
509 614656 : double EMMI::get_prefactor_inverse
510 : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
511 : double &GMM_w_0, double &GMM_w_1,
512 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum)
513 : {
514 : // we need the sum of the covariance matrices
515 4302592 : for(unsigned k=0; k<6; ++k) sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
516 :
517 : // and to calculate its determinant
518 614656 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
519 614656 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
520 614656 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
521 :
522 : // the prefactor is
523 614656 : double pre_fact = cfact_ / sqrt(det) * GMM_w_0 * GMM_w_1;
524 :
525 : // and its inverse
526 614656 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
527 614656 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
528 614656 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
529 614656 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
530 614656 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
531 614656 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
532 :
533 : // return pre-factor
534 614656 : return pre_fact;
535 : }
536 :
537 1568 : double EMMI::get_self_overlap(unsigned id)
538 : {
539 : vector<double> ov;
540 1568 : VectorGeneric<6> sum, inv_sum;
541 1568 : Vector ov_der;
542 : // start loop
543 1847104 : for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
544 : // call auxiliary method
545 614656 : double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
546 1229312 : GMM_d_w_[id], GMM_d_w_[i], sum, inv_sum);
547 : // calculate overlap
548 614656 : double ov_tmp = get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
549 : // add to list
550 614656 : ov.push_back(ov_tmp);
551 : }
552 : // calculate total
553 : double ov_tot = accumulate(ov.begin(), ov.end(), 0.0);
554 : // sort in ascending order
555 : std::sort(ov.begin(), ov.end());
556 : // get cutoff = nl_cutoff_ * ov_tot
557 1568 : double ov_cut = ov_tot * nl_cutoff_;
558 : // integrate tail of ov
559 : double ov_sum = 0.0;
560 1769872 : for(unsigned i=1; i<ov.size(); ++i) {
561 590480 : ov_sum += ov[i];
562 590480 : if(ov_sum >= ov_cut) {
563 3136 : ov_cut = ov[i-1];
564 1568 : break;
565 : }
566 : }
567 : // store
568 1568 : ovdd_cut_.push_back(ov_cut);
569 : // and return it
570 1568 : return ov_tot;
571 : }
572 :
573 : // this is to avoid the calculation of millions of exp function
574 : // when updating the neighbor list using calculate_overlap
575 4 : void EMMI::get_cutoff_ov()
576 : {
577 : // temporary stuff
578 4 : unsigned GMM_d_w_size = GMM_d_w_.size();
579 : // set ov_cut_ to a huge number
580 4 : ov_cut_ = 1.0+9;
581 : // calculate minimum value needed for cutoff
582 3140 : for(unsigned i=0; i<GMM_d_w_.size(); ++i) {
583 2834944 : for(unsigned j=0; j<GMM_m_w_.size(); ++j) {
584 : // get atom type
585 943936 : unsigned jtype = GMM_m_type_[j];
586 : // get index in auxiliary lists
587 943936 : unsigned kaux = jtype * GMM_d_w_size + i;
588 : // get prefactor and multiply by weights
589 3775744 : double pre_fact = fact_md_[kaux] * GMM_d_w_[i] * GMM_m_w_[j];
590 : // calculate ov
591 943936 : double ov = ovdd_cut_[i] / pre_fact;
592 : // check
593 943936 : if(ov < ov_cut_) ov_cut_ = ov;
594 : }
595 : }
596 : // set cutoff
597 4 : ov_cut_ = -2.0 * std::log(ov_cut_);
598 4 : }
599 :
600 : // version with derivatives
601 10788184 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
602 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der)
603 : {
604 10788184 : Vector md;
605 : // calculate vector difference m_m-d_m with/without pbc
606 10788184 : if(pbc_) md = pbcDistance(d_m, m_m);
607 10788184 : else md = delta(d_m, m_m);
608 : // calculate product of transpose of md and inv_cov_md
609 10788184 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
610 10788184 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
611 10788184 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
612 : // calculate product of prod and md
613 10788184 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
614 : // final calculation
615 10788184 : ov = fact_md * exp(-0.5*ov);
616 : // derivatives
617 10788184 : ov_der = ov * Vector(p_x, p_y, p_z);
618 10788184 : return ov;
619 : }
620 :
621 : // fast version without derivatives and cutoff used for neighbor list
622 429018912 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double &fact_md,
623 : const VectorGeneric<6> &inv_cov_md)
624 :
625 : {
626 429018912 : Vector md;
627 : // calculate vector difference m_m-d_m with/without pbc
628 429018912 : if(pbc_) md = pbcDistance(d_m, m_m);
629 429018912 : else md = delta(d_m, m_m);
630 : // calculate product of transpose of md and inv_cov_md
631 429018912 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
632 429018912 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
633 429018912 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
634 : // calculate product of prod and md
635 429018912 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
636 : // final calculation
637 429018912 : if( ov > ov_cut_ ) {
638 : ov = 0.0;
639 : } else {
640 24333930 : ov = fact_md * exp(-0.5*ov);
641 : }
642 429018912 : return ov;
643 : }
644 :
645 1819 : void EMMI::update_neighbor_list()
646 : {
647 : // temp stuff
648 1819 : unsigned GMM_d_w_size = GMM_d_w_.size();
649 1819 : unsigned GMM_m_w_size = GMM_m_w_.size();
650 : // local neighbor list
651 : vector < unsigned > nl_l;
652 : // clear old neighbor list
653 1819 : nl_.clear();
654 : // cycle on all overlaps (in parallel)
655 1819 : unsigned nover = GMM_d_w_size * GMM_m_w_size;
656 429020731 : for(unsigned k=rank_; k<nover; k=k+size_) {
657 : // get indexes
658 429018912 : unsigned i = k / GMM_m_w_size;
659 429018912 : unsigned j = k % GMM_m_w_size;
660 : // get atom type
661 858037824 : unsigned jtype = GMM_m_type_[j];
662 : // get index in auxiliary lists
663 429018912 : unsigned kaux = jtype * GMM_d_w_size + i;
664 : // get prefactor and multiply by weights
665 1716075648 : double pre_fact = fact_md_[kaux] * GMM_d_w_[i] * GMM_m_w_[j];
666 : // calculate overlap
667 858037824 : double ov = get_overlap(GMM_d_m_[i], getPosition(j), pre_fact, inv_cov_md_[kaux]);
668 : // fill the neighbor list
669 429018912 : if(ov >= ovdd_cut_[i]) nl_l.push_back(k);
670 : }
671 : // find total dimension of neighborlist
672 1819 : vector <int> recvcounts(size_, 0);
673 3638 : recvcounts[rank_] = nl_l.size();
674 3638 : comm.Sum(&recvcounts[0], size_);
675 : int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
676 : // resize neighbor stuff
677 1819 : nl_.resize(tot_size);
678 : // calculate vector of displacement
679 1819 : vector<int> disp(size_);
680 1819 : disp[0] = 0;
681 : int rank_size = 0;
682 1823 : for(unsigned i=0; i<size_-1; ++i) {
683 4 : rank_size += recvcounts[i];
684 4 : disp[i+1] = rank_size;
685 : }
686 : // Allgather neighbor list
687 7276 : comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
688 : // now resize derivatives
689 1819 : ovmd_der_.resize(tot_size);
690 1819 : }
691 :
692 4 : void EMMI::prepare()
693 : {
694 4 : if(getExchangeStep()) first_time_=true;
695 4 : }
696 :
697 :
698 : // overlap calculator
699 1819 : void EMMI::calculate_overlap() {
700 :
701 1819 : if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
702 1819 : update_neighbor_list();
703 1819 : first_time_=false;
704 : }
705 :
706 : // clean temporary vectors
707 2142782 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] = 0.0;
708 30541010 : for(unsigned i=0; i<ovmd_der_.size(); ++i) ovmd_der_[i] = Vector(0,0,0);
709 :
710 : // we have to cycle over all model and data GMM components in the neighbor list
711 1819 : unsigned GMM_d_w_size = GMM_d_w_.size();
712 1819 : unsigned GMM_m_w_size = GMM_m_w_.size();
713 20350694 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
714 : // get indexes of data and model component
715 10173528 : unsigned id = nl_[i] / GMM_m_w_size;
716 10173528 : unsigned im = nl_[i] % GMM_m_w_size;
717 : // get atom type
718 20347056 : unsigned jtype = GMM_m_type_[im];
719 : // get index in auxiliary lists
720 10173528 : unsigned kaux = jtype * GMM_d_w_size + id;
721 : // get prefactor and multiply by weights
722 40694112 : double pre_fact = fact_md_[kaux] * GMM_d_w_[id] * GMM_m_w_[im];
723 : // add overlap with im component of model GMM
724 30520584 : ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact,
725 : inv_cov_md_[kaux], ovmd_der_[i]);
726 : }
727 : // communicate stuff
728 3638 : comm.Sum(&ovmd_[0], ovmd_.size());
729 3638 : comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
730 1819 : }
731 :
732 :
733 1819 : void EMMI::calculate() {
734 :
735 : // calculate CV
736 1819 : calculate_overlap();
737 :
738 1819 : if(!analysis_) {
739 :
740 : // BIASING MODE
741 : // rescale factor for ensemble average
742 1819 : double escale = 1.0 / static_cast<double>(nrep_);
743 :
744 : // calculate average of ovmd_ across replicas
745 1819 : if(!no_aver_) {
746 0 : if(comm.Get_rank()==0) {
747 0 : multi_sim_comm.Sum(&ovmd_[0], ovmd_.size());
748 0 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] *= escale;
749 : } else {
750 0 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] = 0.0;
751 : }
752 0 : comm.Sum(&ovmd_[0], ovmd_.size());
753 : }
754 :
755 : // calculate score and scoreb
756 : double ene = 0.0;
757 : double ene_b = 0.0;
758 2142782 : for(unsigned i=0; i<ovmd_.size(); ++i) {
759 : // calculate and store err function
760 2852192 : err_f_[i] = erf ( ( ovmd_[i]-ovdd_[i] ) * inv_sqrt2_ / sigma_mean_[i] );
761 : // energy term
762 2852192 : double ene_tmp = -kbt_ * std::log ( 0.5 / (ovmd_[i]-ovdd_[i]) * err_f_[i]) ;
763 : // increment energy
764 713048 : if(GMM_d_beta_[i] == 1) ene_b += ene_tmp;
765 0 : else ene += ene_tmp;
766 : }
767 :
768 : // multiply by number of replicas
769 1819 : ene /= escale;
770 1819 : ene_b /= escale;
771 :
772 : // clear temporary vector
773 3288752 : for(unsigned i=0; i<atom_der_.size(); ++i) {
774 1095038 : atom_der_[i] = Vector(0,0,0);
775 1095038 : atom_der_b_[i] = Vector(0,0,0);
776 : }
777 : // virial
778 1819 : Tensor virial, virialb;
779 :
780 : // get derivatives of bias with respect to atoms
781 20350694 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
782 : // get indexes of data and model component
783 20347056 : unsigned id = nl_[i] / GMM_m_w_.size();
784 10173528 : unsigned im = nl_[i] % GMM_m_w_.size();
785 : // first part of derivative
786 61041168 : double der = - kbt_/err_f_[id]*sqrt2_pi_*exp(-0.5*(ovmd_[id]-ovdd_[id])*(ovmd_[id]-ovdd_[id])/sigma_mean_[id]/sigma_mean_[id])/sigma_mean_[id];
787 : // second part
788 30520584 : der += kbt_ / (ovmd_[id]-ovdd_[id]);
789 : // chain rule
790 10173528 : Vector tot_der = der * ovmd_der_[i];
791 : // atom's position in GMM cell
792 10173528 : Vector pos;
793 10173528 : if(pbc_) pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
794 20347056 : else pos = getPosition(im);
795 : // add derivative and virial
796 10173528 : if(GMM_d_beta_[id] == 1) {
797 10173528 : atom_der_b_[im] += tot_der;
798 10173528 : virialb += Tensor(pos, -tot_der);
799 : } else {
800 0 : atom_der_[im] += tot_der;
801 0 : virial += Tensor(pos, -tot_der);
802 : }
803 : }
804 :
805 : // communicate stuff
806 3638 : comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
807 3638 : comm.Sum(&atom_der_b_[0][0], 3*atom_der_b_.size());
808 1819 : comm.Sum(virial);
809 1819 : comm.Sum(virialb);
810 :
811 : // set derivatives
812 3288752 : for(unsigned i=0; i<atom_der_.size(); ++i) {
813 3285114 : setAtomsDerivatives(getPntrToComponent("score"), i, atom_der_[i]);
814 2190076 : setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_b_[i]);
815 : }
816 :
817 : // and set virial
818 3638 : setBoxDerivatives(getPntrToComponent("score"), virial);
819 3638 : setBoxDerivatives(getPntrToComponent("scoreb"), virialb);
820 :
821 : // set value of the score
822 3638 : getPntrToComponent("score")->set(ene);
823 : // set value of the beta score
824 3638 : getPntrToComponent("scoreb")->set(ene_b);
825 :
826 : } else {
827 :
828 : // ANALYSIS MODE
829 : // prepare stuff for the first time
830 0 : if(nframe_ <= 0.0) {
831 0 : Devfile_.link(*this);
832 0 : Devfile_.open("ovmd_deviations.dat");
833 : Devfile_.setHeavyFlush();
834 0 : Devfile_.fmtField("%12.6f");
835 0 : ovmd_ave_.resize(GMM_d_w_.size());
836 0 : for(unsigned i=0; i<ovmd_ave_.size(); ++i) ovmd_ave_[i] = 0.0;
837 : }
838 :
839 : // increment number of frames
840 0 : nframe_ += 1.0;
841 :
842 : // add average ovmd_
843 0 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] += ovmd_[i];
844 :
845 : // print stuff
846 0 : for(unsigned i=0; i<ovmd_.size(); ++i) {
847 : // convert i to string
848 0 : stringstream ss;
849 : ss << i;
850 : // labels
851 0 : string label = "ovmd_" + ss.str();
852 : // print entry
853 0 : double ave = ovmd_ave_[i] / nframe_;
854 0 : double dev2 = (ave-ovdd_[i])*(ave-ovdd_[i])/ovdd_[i]/ovdd_[i];
855 0 : double dev = sqrt(dev2);
856 0 : Devfile_.printField(label, dev);
857 : }
858 0 : Devfile_.printField();
859 : }
860 :
861 1819 : }
862 :
863 : }
864 4839 : }
|