Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2017-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionAtomistic.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "core/ActionRegister.h"
26 : #include "core/PlumedMain.h"
27 : #include "tools/Matrix.h"
28 : #include "tools/Communicator.h"
29 : #include "core/GenericMolInfo.h"
30 : #include "core/ActionSet.h"
31 : #include "tools/File.h"
32 : #include "tools/Random.h"
33 :
34 : #include <string>
35 : #include <map>
36 : #include <numeric>
37 : #include <ctime>
38 :
39 : namespace PLMD {
40 : namespace isdb {
41 :
42 : //+PLUMEDOC ISDB_COLVAR EMMI
43 : /*
44 : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
45 :
46 : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in Ref. \cite Hanot113951 .
47 : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
48 :
49 : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
50 : GMM_FILE. We are currently working on a web server to perform
51 : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
52 :
53 : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
54 : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
55 : the Metainference approach \cite Bonomi:2016ip .
56 :
57 : \warning
58 : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
59 :
60 : \note
61 : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
62 : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
63 : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
64 :
65 : \note
66 : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
67 : add the NOPBC flag to the input line
68 :
69 : \par Examples
70 :
71 : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
72 : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
73 :
74 : \plumedfile
75 : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
76 : 0 2.9993805e+01 6.54628 10.37820 -0.92988 2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02 1
77 : 1 2.3468312e+01 6.56095 10.34790 -0.87808 1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02 1
78 : ...
79 : \endplumedfile
80 :
81 : To accelerate the computation of the Bayesian score, one can:
82 : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
83 : - calculate the restraint every other step (or more).
84 :
85 : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
86 : using a GROMACS index file.
87 :
88 : The input file looks as follows:
89 :
90 : \plumedfile
91 : # include pdb info
92 : MOLINFO STRUCTURE=prot.pdb
93 :
94 : # all heavy atoms
95 : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
96 :
97 : # create EMMI score
98 : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
99 :
100 : # translate into bias - apply every 2 steps
101 : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
102 :
103 : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
104 : \endplumedfile
105 :
106 :
107 : */
108 : //+ENDPLUMEDOC
109 :
110 : class EMMI :
111 : public ActionAtomistic,
112 : public ActionWithArguments,
113 : public ActionWithValue {
114 : private:
115 :
116 : // temperature in kbt
117 : double kbt_;
118 : // model GMM - atom types
119 : std::vector<unsigned> GMM_m_type_;
120 : // model GMM - list of atom sigmas - one per atom type
121 : std::vector<double> GMM_m_s_;
122 : // model GMM - list of atom weights - one per atom type
123 : std::vector<double> GMM_m_w_;
124 : // data GMM - means, weights, and covariances + beta option
125 : std::vector<Vector> GMM_d_m_;
126 : std::vector<double> GMM_d_w_;
127 : std::vector< VectorGeneric<6> > GMM_d_cov_;
128 : std::vector<int> GMM_d_beta_;
129 : std::vector < std::vector<int> > GMM_d_grps_;
130 : // overlaps
131 : std::vector<double> ovmd_;
132 : std::vector<double> ovdd_;
133 : std::vector<double> ovmd_ave_;
134 : // and derivatives
135 : std::vector<Vector> ovmd_der_;
136 : std::vector<Vector> atom_der_;
137 : std::vector<double> GMMid_der_;
138 : // constants
139 : double cfact_;
140 : double inv_sqrt2_, sqrt2_pi_;
141 : // metainference
142 : unsigned nrep_;
143 : unsigned replica_;
144 : std::vector<double> sigma_;
145 : std::vector<double> sigma_min_;
146 : std::vector<double> sigma_max_;
147 : std::vector<double> dsigma_;
148 : // list of prefactors for overlap between two Gaussians
149 : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
150 : std::vector<double> pre_fact_;
151 : // inverse of the sum of model and data covariances matrices
152 : std::vector< VectorGeneric<6> > inv_cov_md_;
153 : // neighbor list
154 : double nl_cutoff_;
155 : unsigned nl_stride_;
156 : bool first_time_;
157 : bool no_aver_;
158 : std::vector<unsigned> nl_;
159 : // parallel stuff
160 : unsigned size_;
161 : unsigned rank_;
162 : // pbc
163 : bool pbc_;
164 : // Monte Carlo stuff
165 : int MCstride_;
166 : double MCaccept_;
167 : double MCtrials_;
168 : Random random_;
169 : // status stuff
170 : unsigned int statusstride_;
171 : std::string statusfilename_;
172 : OFile statusfile_;
173 : bool first_status_;
174 : // regression
175 : unsigned nregres_;
176 : double scale_;
177 : double scale_min_;
178 : double scale_max_;
179 : double dscale_;
180 : // tabulated exponential
181 : double dpcutoff_;
182 : double dexp_;
183 : unsigned nexp_;
184 : std::vector<double> tab_exp_;
185 : // simulated annealing
186 : unsigned nanneal_;
187 : double kanneal_;
188 : double anneal_;
189 : // prior exponent
190 : double prior_;
191 : // noise type
192 : unsigned noise_;
193 : // total score and virial;
194 : double ene_;
195 : Tensor virial_;
196 : // model overlap file
197 : unsigned int ovstride_;
198 : std::string ovfilename_;
199 :
200 : // Reweighting additions
201 : bool do_reweight_;
202 : bool first_time_w_;
203 : std::vector<double> forces;
204 : std::vector<double> forcesToApply;
205 :
206 : // average weights
207 : double decay_w_;
208 : std::vector<double> average_weights_;
209 :
210 : // write file with model overlap
211 : void write_model_overlap(long long int step);
212 : // get median of std::vector
213 : double get_median(std::vector<double> &v);
214 : // annealing
215 : double get_annealing(long long int step);
216 : // do regression
217 : double scaleEnergy(double s);
218 : double doRegression();
219 : // read and write status
220 : void read_status();
221 : void print_status(long long int step);
222 : // accept or reject
223 : bool doAccept(double oldE, double newE, double kbt);
224 : // do MonteCarlo
225 : void doMonteCarlo();
226 : // read error file
227 : std::vector<double> read_exp_errors(const std::string & errfile);
228 : // read experimental overlaps
229 : std::vector<double> read_exp_overlaps(const std::string & ovfile);
230 : // calculate model GMM weights and covariances
231 : std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
232 : // read data GMM file
233 : void get_GMM_d(const std::string & gmm_file);
234 : // check GMM data
235 : void check_GMM_d(const VectorGeneric<6> &cov, double w);
236 : // auxiliary method
237 : void calculate_useful_stuff(double reso);
238 : // get pref_fact and inv_cov_md
239 : double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
240 : double GMM_w_0, double GMM_w_1,
241 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
242 : // calculate self overlaps between data GMM components - ovdd_
243 : double get_self_overlap(unsigned id);
244 : // calculate overlap between two Gaussians
245 : double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
246 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
247 : // calculate exponent of overlap for neighbor list update
248 : double get_exp_overlap(const Vector &m_m, const Vector &d_m,
249 : const VectorGeneric<6> &inv_cov_md);
250 : // update the neighbor list
251 : void update_neighbor_list();
252 : // calculate overlap
253 : void calculate_overlap();
254 : // Gaussian noise
255 : void calculate_Gauss();
256 : // Outliers noise
257 : void calculate_Outliers();
258 : // Marginal noise
259 : void calculate_Marginal();
260 :
261 : // See MetainferenceBase
262 : void get_weights(double &weight, double &norm, double &neff);
263 :
264 : public:
265 : static void registerKeywords( Keywords& keys );
266 : explicit EMMI(const ActionOptions&);
267 : // needed for reweighting
268 : void setDerivatives();
269 : void turnOnDerivatives() override;
270 : unsigned getNumberOfDerivatives() override;
271 : void lockRequests() override;
272 : void unlockRequests() override;
273 : void calculateNumericalDerivatives( ActionWithValue* a ) override;
274 : void apply() override;
275 : void setArgDerivatives(Value *v, const double &d);
276 : void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
277 : void setBoxDerivatives(Value*v, const Tensor&d);
278 : // active methods:
279 : void prepare() override;
280 : void calculate() override;
281 : };
282 :
283 : inline
284 30 : void EMMI::setDerivatives() {
285 : // Get appropriate number of derivatives
286 : // Derivatives are first for arguments and then for atoms
287 : unsigned nder;
288 30 : if( getNumberOfAtoms()>0 ) {
289 30 : nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
290 : } else {
291 0 : nder = getNumberOfArguments();
292 : }
293 :
294 : // Resize all derivative arrays
295 30 : forces.resize( nder );
296 30 : forcesToApply.resize( nder );
297 146 : for(int i=0; i<getNumberOfComponents(); ++i) {
298 116 : getPntrToComponent(i)->resizeDerivatives(nder);
299 : }
300 30 : }
301 :
302 : inline
303 22 : void EMMI::turnOnDerivatives() {
304 22 : ActionWithValue::turnOnDerivatives();
305 22 : }
306 :
307 : inline
308 80 : unsigned EMMI::getNumberOfDerivatives() {
309 80 : if( getNumberOfAtoms()>0 ) {
310 80 : return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
311 : }
312 0 : return getNumberOfArguments();
313 : }
314 :
315 : inline
316 70 : void EMMI::lockRequests() {
317 : ActionAtomistic::lockRequests();
318 : ActionWithArguments::lockRequests();
319 70 : }
320 :
321 : inline
322 70 : void EMMI::unlockRequests() {
323 : ActionAtomistic::unlockRequests();
324 : ActionWithArguments::unlockRequests();
325 70 : }
326 :
327 : inline
328 10 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
329 10 : if( getNumberOfArguments()>0 ) {
330 0 : ActionWithArguments::calculateNumericalDerivatives( a );
331 : }
332 10 : if( getNumberOfAtoms()>0 ) {
333 10 : Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
334 44 : for(int j=0; j<getNumberOfComponents(); ++j) {
335 34 : for(unsigned i=0; i<getNumberOfArguments(); ++i)
336 0 : if(getPntrToComponent(j)->hasDerivatives()) {
337 0 : save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
338 : }
339 : }
340 10 : calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
341 44 : for(int j=0; j<getNumberOfComponents(); ++j) {
342 34 : for(unsigned i=0; i<getNumberOfArguments(); ++i)
343 0 : if(getPntrToComponent(j)->hasDerivatives()) {
344 0 : getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
345 : }
346 : }
347 : }
348 10 : }
349 :
350 : inline
351 70 : void EMMI::apply() {
352 : bool wasforced=false;
353 70 : forcesToApply.assign(forcesToApply.size(),0.0);
354 382 : for(int i=0; i<getNumberOfComponents(); ++i) {
355 312 : if( getPntrToComponent(i)->applyForce( forces ) ) {
356 : wasforced=true;
357 0 : for(unsigned i=0; i<forces.size(); ++i) {
358 0 : forcesToApply[i]+=forces[i];
359 : }
360 : }
361 : }
362 70 : if( wasforced ) {
363 0 : unsigned ind=0;
364 0 : addForcesOnArguments( 0, forcesToApply, ind, getLabel() );
365 0 : if( getNumberOfAtoms()>0 ) {
366 0 : setForcesOnAtoms( forcesToApply, ind );
367 : }
368 : }
369 70 : }
370 :
371 : inline
372 : void EMMI::setArgDerivatives(Value *v, const double &d) {
373 : v->addDerivative(0,d);
374 : }
375 :
376 : inline
377 10961336 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
378 10961336 : const unsigned noa=getNumberOfArguments();
379 10961336 : v->addDerivative(noa+3*i+0,d[0]);
380 10961336 : v->addDerivative(noa+3*i+1,d[1]);
381 10961336 : v->addDerivative(noa+3*i+2,d[2]);
382 10961336 : }
383 :
384 : inline
385 18220 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
386 18220 : const unsigned noa=getNumberOfArguments();
387 : const unsigned nat=getNumberOfAtoms();
388 18220 : v->addDerivative(noa+3*nat+0,d(0,0));
389 18220 : v->addDerivative(noa+3*nat+1,d(0,1));
390 18220 : v->addDerivative(noa+3*nat+2,d(0,2));
391 18220 : v->addDerivative(noa+3*nat+3,d(1,0));
392 18220 : v->addDerivative(noa+3*nat+4,d(1,1));
393 18220 : v->addDerivative(noa+3*nat+5,d(1,2));
394 18220 : v->addDerivative(noa+3*nat+6,d(2,0));
395 18220 : v->addDerivative(noa+3*nat+7,d(2,1));
396 18220 : v->addDerivative(noa+3*nat+8,d(2,2));
397 18220 : }
398 :
399 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
400 :
401 32 : void EMMI::registerKeywords( Keywords& keys ) {
402 32 : Action::registerKeywords( keys );
403 32 : ActionAtomistic::registerKeywords( keys );
404 32 : ActionWithValue::registerKeywords( keys );
405 32 : ActionWithArguments::registerKeywords( keys );
406 64 : keys.addInputKeyword("optional","ARG","scalar","the labels of the values from which the function is calculated");
407 32 : keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
408 32 : keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
409 32 : keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
410 32 : keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
411 32 : keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
412 32 : keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
413 32 : keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
414 32 : keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
415 32 : keys.add("optional","SIGMA0","initial value of the uncertainty");
416 32 : keys.add("optional","DSIGMA","MC step for uncertainties");
417 32 : keys.add("optional","MC_STRIDE", "Monte Carlo stride");
418 32 : keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
419 32 : keys.add("optional","OV_FILE","file with experimental overlaps");
420 32 : keys.add("optional","NORM_DENSITY","integral of the experimental density");
421 32 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
422 32 : keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
423 32 : keys.add("optional","REGRESSION","regression stride");
424 32 : keys.add("optional","REG_SCALE_MIN","regression minimum scale");
425 32 : keys.add("optional","REG_SCALE_MAX","regression maximum scale");
426 32 : keys.add("optional","REG_DSCALE","regression maximum scale MC move");
427 32 : keys.add("optional","SCALE","scale factor");
428 32 : keys.add("optional","ANNEAL", "Length of annealing cycle");
429 32 : keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
430 32 : keys.add("optional","TEMP","temperature");
431 32 : keys.add("optional","PRIOR", "exponent of uncertainty prior");
432 32 : keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
433 32 : keys.add("optional","WRITE_OV","write a file with model overlaps");
434 32 : keys.add("optional","AVERAGING", "Averaging window for weights");
435 32 : keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
436 32 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
437 64 : keys.addOutputComponent("scoreb","default","scalar","Bayesian score");
438 64 : keys.addOutputComponent("acc", "NOISETYPE","scalar","MC acceptance for uncertainty");
439 64 : keys.addOutputComponent("scale", "REGRESSION","scalar","scale factor");
440 64 : keys.addOutputComponent("accscale", "REGRESSION","scalar","MC acceptance for scale regression");
441 64 : keys.addOutputComponent("enescale", "REGRESSION","scalar","MC energy for scale regression");
442 64 : keys.addOutputComponent("anneal","ANNEAL","scalar","annealing factor");
443 64 : keys.addOutputComponent("weight", "REWEIGHT","scalar", "weights of the weighted average");
444 64 : keys.addOutputComponent("biasDer", "REWEIGHT","scalar", "derivatives with respect to the bias");
445 64 : keys.addOutputComponent("sigma", "NOISETYPE","scalar", "uncertainty in the forward models and experiment");
446 64 : keys.addOutputComponent("neff", "default","scalar", "effective number of replicas");
447 32 : }
448 :
449 30 : EMMI::EMMI(const ActionOptions&ao):
450 : Action(ao),
451 : ActionAtomistic(ao),
452 : ActionWithArguments(ao),
453 : ActionWithValue(ao),
454 30 : inv_sqrt2_(0.707106781186548),
455 30 : sqrt2_pi_(0.797884560802865),
456 30 : first_time_(true), no_aver_(false), pbc_(true),
457 30 : MCstride_(1), MCaccept_(0.), MCtrials_(0.),
458 60 : statusstride_(0), first_status_(true),
459 30 : nregres_(0), scale_(1.),
460 30 : dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
461 60 : kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
462 30 : do_reweight_(false), first_time_w_(true), decay_w_(1.) {
463 : // periodic boundary conditions
464 30 : bool nopbc=!pbc_;
465 30 : parseFlag("NOPBC",nopbc);
466 30 : pbc_=!nopbc;
467 :
468 : // list of atoms
469 : std::vector<AtomNumber> atoms;
470 60 : parseAtomList("ATOMS", atoms);
471 :
472 : // file with data GMM
473 : std::string GMM_file;
474 60 : parse("GMM_FILE", GMM_file);
475 :
476 : // type of data noise
477 : std::string noise;
478 60 : parse("NOISETYPE",noise);
479 30 : if (noise=="GAUSS") {
480 18 : noise_ = 0;
481 12 : } else if(noise=="OUTLIERS") {
482 6 : noise_ = 1;
483 6 : } else if(noise=="MARGINAL") {
484 6 : noise_ = 2;
485 : } else {
486 0 : error("Unknown noise type!");
487 : }
488 :
489 : // minimum value for error
490 : double sigma_min;
491 30 : parse("SIGMA_MIN", sigma_min);
492 30 : if(sigma_min<0) {
493 0 : error("SIGMA_MIN should be greater or equal to zero");
494 : }
495 :
496 : // the following parameters must be specified with noise type 0 and 1
497 : double sigma_ini, dsigma;
498 30 : if(noise_!=2) {
499 : // initial value of the uncertainty
500 24 : parse("SIGMA0", sigma_ini);
501 24 : if(sigma_ini<=0) {
502 0 : error("you must specify a positive SIGMA0");
503 : }
504 : // MC parameters
505 24 : parse("DSIGMA", dsigma);
506 24 : if(dsigma<0) {
507 0 : error("you must specify a positive DSIGMA");
508 : }
509 24 : parse("MC_STRIDE", MCstride_);
510 24 : if(dsigma>0 && MCstride_<=0) {
511 0 : error("you must specify a positive MC_STRIDE");
512 : }
513 : // status file parameters
514 24 : parse("WRITE_STRIDE", statusstride_);
515 24 : if(statusstride_==0) {
516 0 : error("you must specify a positive WRITE_STRIDE");
517 : }
518 48 : parse("STATUS_FILE", statusfilename_);
519 24 : if(statusfilename_=="") {
520 48 : statusfilename_ = "MISTATUS"+getLabel();
521 : } else {
522 0 : statusfilename_ = statusfilename_+getLabel();
523 : }
524 : }
525 :
526 : // error file
527 : std::string errfile;
528 60 : parse("ERR_FILE", errfile);
529 :
530 : // file with experimental overlaps
531 : std::string ovfile;
532 30 : parse("OV_FILE", ovfile);
533 :
534 : // integral of the experimetal density
535 30 : double norm_d = 0.0;
536 30 : parse("NORM_DENSITY", norm_d);
537 :
538 : // temperature
539 30 : kbt_ = getkBT();
540 :
541 : // exponent of uncertainty prior
542 30 : parse("PRIOR",prior_);
543 :
544 : // simulated annealing stuff
545 30 : parse("ANNEAL", nanneal_);
546 30 : parse("ANNEAL_FACT", kanneal_);
547 30 : if(nanneal_>0 && kanneal_<=1.0) {
548 0 : error("with ANNEAL, ANNEAL_FACT must be greater than 1");
549 : }
550 :
551 : // regression stride
552 30 : parse("REGRESSION",nregres_);
553 : // other regression parameters
554 30 : if(nregres_>0) {
555 0 : parse("REG_SCALE_MIN",scale_min_);
556 0 : parse("REG_SCALE_MAX",scale_max_);
557 0 : parse("REG_DSCALE",dscale_);
558 : // checks
559 0 : if(scale_max_<=scale_min_) {
560 0 : error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
561 : }
562 0 : if(dscale_<=0.) {
563 0 : error("with REGRESSION, REG_DSCALE must be positive");
564 : }
565 : }
566 :
567 : // scale factor
568 30 : parse("SCALE", scale_);
569 :
570 : // read map resolution
571 : double reso;
572 30 : parse("RESOLUTION", reso);
573 30 : if(reso<=0.) {
574 0 : error("RESOLUTION should be strictly positive");
575 : }
576 :
577 : // neighbor list stuff
578 30 : parse("NL_CUTOFF",nl_cutoff_);
579 30 : if(nl_cutoff_<=0.0) {
580 0 : error("NL_CUTOFF should be explicitly specified and positive");
581 : }
582 30 : parse("NL_STRIDE",nl_stride_);
583 30 : if(nl_stride_==0) {
584 0 : error("NL_STRIDE should be explicitly specified and positive");
585 : }
586 :
587 : // averaging or not
588 30 : parseFlag("NO_AVER",no_aver_);
589 :
590 : // write overlap file
591 30 : parse("WRITE_OV_STRIDE", ovstride_);
592 30 : parse("WRITE_OV", ovfilename_);
593 32 : if(ovstride_>0 && ovfilename_=="") {
594 0 : error("With WRITE_OV_STRIDE you must specify WRITE_OV");
595 : }
596 :
597 : // set parallel stuff
598 30 : size_=comm.Get_size();
599 30 : rank_=comm.Get_rank();
600 :
601 : // get number of replicas
602 30 : if(rank_==0) {
603 18 : if(no_aver_) {
604 16 : nrep_ = 1;
605 : } else {
606 2 : nrep_ = multi_sim_comm.Get_size();
607 : }
608 18 : replica_ = multi_sim_comm.Get_rank();
609 : } else {
610 12 : nrep_ = 0;
611 12 : replica_ = 0;
612 : }
613 30 : comm.Sum(&nrep_,1);
614 30 : comm.Sum(&replica_,1);
615 :
616 : // Reweighting flag
617 30 : parseFlag("REWEIGHT", do_reweight_);
618 30 : if(do_reweight_&&getNumberOfArguments()!=1) {
619 0 : error("To REWEIGHT one must provide one single bias as an argument");
620 : }
621 30 : if(do_reweight_&&no_aver_) {
622 0 : error("REWEIGHT cannot be used with NO_AVER");
623 : }
624 30 : if(do_reweight_&&nrep_<2) {
625 0 : error("REWEIGHT can only be used in parallel with 2 or more replicas");
626 : }
627 30 : if(!getRestart()) {
628 30 : average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
629 : } else {
630 0 : average_weights_.resize(nrep_, 0.);
631 : }
632 :
633 30 : unsigned averaging=0;
634 30 : parse("AVERAGING", averaging);
635 30 : if(averaging>0) {
636 2 : decay_w_ = 1./static_cast<double> (averaging);
637 : }
638 :
639 30 : checkRead();
640 :
641 30 : log.printf(" atoms involved : ");
642 16906 : for(unsigned i=0; i<atoms.size(); ++i) {
643 16876 : log.printf("%d ",atoms[i].serial());
644 : }
645 30 : log.printf("\n");
646 30 : log.printf(" GMM data file : %s\n", GMM_file.c_str());
647 30 : if(no_aver_) {
648 28 : log.printf(" without ensemble averaging\n");
649 : }
650 30 : log.printf(" type of data noise : %s\n", noise.c_str());
651 30 : log.printf(" neighbor list cutoff : %lf\n", nl_cutoff_);
652 30 : log.printf(" neighbor list stride : %u\n", nl_stride_);
653 30 : log.printf(" minimum uncertainty : %f\n",sigma_min);
654 30 : log.printf(" scale factor : %lf\n",scale_);
655 30 : if(nregres_>0) {
656 0 : log.printf(" regression stride : %u\n", nregres_);
657 0 : log.printf(" regression minimum scale : %lf\n", scale_min_);
658 0 : log.printf(" regression maximum scale : %lf\n", scale_max_);
659 0 : log.printf(" regression maximum scale MC move : %lf\n", dscale_);
660 : }
661 30 : if(noise_!=2) {
662 24 : log.printf(" initial value of the uncertainty : %f\n",sigma_ini);
663 24 : log.printf(" max MC move in uncertainty : %f\n",dsigma);
664 24 : log.printf(" MC stride : %u\n", MCstride_);
665 24 : log.printf(" reading/writing to status file : %s\n",statusfilename_.c_str());
666 24 : log.printf(" with stride : %u\n",statusstride_);
667 : }
668 30 : if(errfile.size()>0) {
669 0 : log.printf(" reading experimental errors from file : %s\n", errfile.c_str());
670 : }
671 30 : if(ovfile.size()>0) {
672 0 : log.printf(" reading experimental overlaps from file : %s\n", ovfile.c_str());
673 : }
674 30 : log.printf(" temperature of the system in energy unit : %f\n",kbt_);
675 30 : log.printf(" prior exponent : %f\n",prior_);
676 30 : log.printf(" number of replicas for averaging: %u\n",nrep_);
677 30 : log.printf(" id of the replica : %u\n",replica_);
678 30 : if(nanneal_>0) {
679 4 : log.printf(" length of annealing cycle : %u\n",nanneal_);
680 4 : log.printf(" annealing factor : %f\n",kanneal_);
681 : }
682 30 : if(ovstride_>0) {
683 2 : log.printf(" stride for writing model overlaps : %u\n",ovstride_);
684 2 : log.printf(" file for writing model overlaps : %s\n", ovfilename_.c_str());
685 : }
686 :
687 : // set constant quantity before calculating stuff
688 30 : cfact_ = 1.0/pow( 2.0*pi, 1.5 );
689 :
690 : // calculate model GMM constant parameters
691 30 : std::vector<double> GMM_m_w = get_GMM_m(atoms);
692 :
693 : // read data GMM parameters
694 30 : get_GMM_d(GMM_file);
695 30 : log.printf(" number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
696 :
697 : // normalize atom weight map
698 30 : if(norm_d <= 0.0) {
699 30 : norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
700 : }
701 : double norm_m = accumulate(GMM_m_w.begin(), GMM_m_w.end(), 0.0);
702 : // renormalization
703 150 : for(unsigned i=0; i<GMM_m_w_.size(); ++i) {
704 120 : GMM_m_w_[i] *= norm_d / norm_m;
705 : }
706 :
707 : // read experimental errors
708 : std::vector<double> exp_err;
709 30 : if(errfile.size()>0) {
710 0 : exp_err = read_exp_errors(errfile);
711 : }
712 :
713 : // get self overlaps between data GMM components
714 30 : if(ovfile.size()>0) {
715 0 : ovdd_ = read_exp_overlaps(ovfile);
716 : } else {
717 2082 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
718 2052 : double ov = get_self_overlap(i);
719 2052 : ovdd_.push_back(ov);
720 : }
721 : }
722 :
723 30 : log.printf(" number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
724 : // cycle on GMM groups
725 60 : for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
726 30 : log.printf(" group %d\n", Gid);
727 : // calculate median overlap and experimental error
728 : std::vector<double> ovdd;
729 : std::vector<double> err;
730 : // cycle on the group members
731 2082 : for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
732 : // GMM id
733 2052 : int GMMid = GMM_d_grps_[Gid][i];
734 : // add to experimental error
735 2052 : if(errfile.size()>0) {
736 0 : err.push_back(exp_err[GMMid]);
737 : } else {
738 2052 : err.push_back(0.);
739 : }
740 : // add to GMM overlap
741 2052 : ovdd.push_back(ovdd_[GMMid]);
742 : }
743 : // calculate median quantities
744 30 : double ovdd_m = get_median(ovdd);
745 30 : double err_m = get_median(err);
746 : // print out statistics
747 30 : log.printf(" # of members : %zu\n", GMM_d_grps_[Gid].size());
748 30 : log.printf(" median overlap : %lf\n", ovdd_m);
749 30 : log.printf(" median error : %lf\n", err_m);
750 : // add minimum value of sigma for this group of GMMs
751 30 : sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
752 : // these are only needed with Gaussian and Outliers noise models
753 30 : if(noise_!=2) {
754 : // set dsigma
755 24 : dsigma_.push_back(dsigma * ovdd_m);
756 : // set sigma max
757 24 : sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
758 : // initialize sigma
759 48 : sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
760 : }
761 : }
762 :
763 : // read status file if restarting
764 30 : if(getRestart() && noise_!=2) {
765 0 : read_status();
766 : }
767 :
768 : // calculate auxiliary stuff
769 30 : calculate_useful_stuff(reso);
770 :
771 : // prepare data and derivative std::vectors
772 30 : ovmd_.resize(ovdd_.size());
773 30 : ovmd_ave_.resize(ovdd_.size());
774 30 : atom_der_.resize(GMM_m_type_.size());
775 30 : GMMid_der_.resize(ovdd_.size());
776 :
777 : // clear things that are no longer needed
778 : GMM_d_cov_.clear();
779 :
780 : // add components
781 60 : addComponentWithDerivatives("scoreb");
782 30 : componentIsNotPeriodic("scoreb");
783 :
784 30 : if(noise_!=2) {
785 48 : addComponent("acc");
786 48 : componentIsNotPeriodic("acc");
787 : }
788 :
789 30 : if(nregres_>0) {
790 0 : addComponent("scale");
791 0 : componentIsNotPeriodic("scale");
792 0 : addComponent("accscale");
793 0 : componentIsNotPeriodic("accscale");
794 0 : addComponent("enescale");
795 0 : componentIsNotPeriodic("enescale");
796 : }
797 :
798 30 : if(nanneal_>0) {
799 8 : addComponent("anneal");
800 8 : componentIsNotPeriodic("anneal");
801 : }
802 :
803 30 : if(do_reweight_) {
804 4 : addComponent("biasDer");
805 4 : componentIsNotPeriodic("biasDer");
806 4 : addComponent("weight");
807 4 : componentIsNotPeriodic("weight");
808 : }
809 :
810 60 : addComponent("neff");
811 30 : componentIsNotPeriodic("neff");
812 :
813 54 : for(unsigned i=0; i<sigma_.size(); ++i) {
814 : std::string num;
815 24 : Tools::convert(i, num);
816 48 : addComponent("sigma-"+num);
817 24 : componentIsNotPeriodic("sigma-"+num);
818 48 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
819 : }
820 :
821 : // initialize random seed
822 : unsigned iseed;
823 30 : if(rank_==0) {
824 18 : iseed = time(NULL)+replica_;
825 : } else {
826 12 : iseed = 0;
827 : }
828 30 : comm.Sum(&iseed, 1);
829 30 : random_.setSeed(-iseed);
830 :
831 : // request the atoms
832 30 : requestAtoms(atoms);
833 30 : setDerivatives();
834 :
835 : // print bibliography
836 60 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
837 60 : log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
838 60 : log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
839 30 : if(do_reweight_) {
840 4 : log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
841 : }
842 30 : if(!no_aver_ && nrep_>1) {
843 4 : log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
844 : }
845 30 : log<<"\n";
846 30 : }
847 :
848 4 : void EMMI::write_model_overlap(long long int step) {
849 4 : OFile ovfile;
850 4 : ovfile.link(*this);
851 : std::string num;
852 4 : Tools::convert(step,num);
853 4 : std::string name = ovfilename_+"-"+num;
854 4 : ovfile.open(name);
855 : ovfile.setHeavyFlush();
856 4 : ovfile.fmtField("%10.7e ");
857 : // write overlaps
858 40 : for(int i=0; i<ovmd_.size(); ++i) {
859 36 : ovfile.printField("Model", ovmd_[i]);
860 72 : ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
861 36 : ovfile.printField("Data", ovdd_[i]);
862 36 : ovfile.printField();
863 : }
864 4 : ovfile.close();
865 4 : }
866 :
867 60 : double EMMI::get_median(std::vector<double> &v) {
868 : // dimension of std::vector
869 60 : unsigned size = v.size();
870 : // in case of only one entry
871 60 : if (size==1) {
872 0 : return v[0];
873 : } else {
874 : // reorder std::vector
875 60 : sort(v.begin(), v.end());
876 : // odd or even?
877 60 : if (size%2==0) {
878 4 : return (v[size/2-1]+v[size/2])/2.0;
879 : } else {
880 56 : return v[size/2];
881 : }
882 : }
883 : }
884 :
885 0 : void EMMI::read_status() {
886 : double MDtime;
887 : // open file
888 : auto ifile = Tools::make_unique<IFile>();
889 0 : ifile->link(*this);
890 0 : if(ifile->FileExist(statusfilename_)) {
891 0 : ifile->open(statusfilename_);
892 0 : while(ifile->scanField("MD_time", MDtime)) {
893 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
894 : // convert i to std::string
895 : std::string num;
896 0 : Tools::convert(i,num);
897 : // read entries
898 0 : ifile->scanField("s"+num, sigma_[i]);
899 : }
900 : // new line
901 0 : ifile->scanField();
902 : }
903 0 : ifile->close();
904 : } else {
905 0 : error("Cannot find status file "+statusfilename_+"\n");
906 : }
907 0 : }
908 :
909 12729 : void EMMI::print_status(long long int step) {
910 : // if first time open the file
911 12729 : if(first_status_) {
912 24 : first_status_ = false;
913 24 : statusfile_.link(*this);
914 24 : statusfile_.open(statusfilename_);
915 : statusfile_.setHeavyFlush();
916 48 : statusfile_.fmtField("%6.3e ");
917 : }
918 : // write fields
919 12729 : double MDtime = static_cast<double>(step)*getTimeStep();
920 12729 : statusfile_.printField("MD_time", MDtime);
921 25458 : for(unsigned i=0; i<sigma_.size(); ++i) {
922 : // convert i to std::string
923 : std::string num;
924 12729 : Tools::convert(i,num);
925 : // print entry
926 25458 : statusfile_.printField("s"+num, sigma_[i]);
927 : }
928 12729 : statusfile_.printField();
929 12729 : }
930 :
931 0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
932 : bool accept = false;
933 : // calculate delta energy
934 0 : double delta = ( newE - oldE ) / kbt;
935 : // if delta is negative always accept move
936 0 : if( delta < 0.0 ) {
937 : accept = true;
938 : } else {
939 : // otherwise extract random number
940 0 : double s = random_.RandU01();
941 0 : if( s < std::exp(-delta) ) {
942 : accept = true;
943 : }
944 : }
945 0 : return accept;
946 : }
947 :
948 0 : void EMMI::doMonteCarlo() {
949 : // extract random GMM group
950 0 : unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
951 0 : if(nGMM==GMM_d_grps_.size()) {
952 0 : nGMM -= 1;
953 : }
954 :
955 : // generate random move
956 0 : double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
957 : // new sigma
958 0 : double new_s = sigma_[nGMM] + shift;
959 : // check boundaries
960 0 : if(new_s > sigma_max_[nGMM]) {
961 0 : new_s = 2.0 * sigma_max_[nGMM] - new_s;
962 : }
963 0 : if(new_s < sigma_min_[nGMM]) {
964 0 : new_s = 2.0 * sigma_min_[nGMM] - new_s;
965 : }
966 : // old s2
967 0 : double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
968 : // new s2
969 0 : double new_inv_s2 = 1.0 / new_s / new_s;
970 :
971 : // cycle on GMM group and calculate old and new energy
972 : double old_ene = 0.0;
973 : double new_ene = 0.0;
974 0 : double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
975 :
976 : // in case of Gaussian noise
977 0 : if(noise_==0) {
978 : double chi2 = 0.0;
979 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
980 : // id GMM component
981 0 : int GMMid = GMM_d_grps_[nGMM][i];
982 : // deviation
983 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
984 : // add to chi2
985 0 : chi2 += dev * dev;
986 : }
987 : // final energy calculation: add normalization and prior
988 0 : old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
989 0 : new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
990 : }
991 :
992 : // in case of Outliers noise
993 0 : if(noise_==1) {
994 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
995 : // id GMM component
996 0 : int GMMid = GMM_d_grps_[nGMM][i];
997 : // calculate deviation
998 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
999 : // add to energies
1000 0 : old_ene += std::log1p( 0.5 * dev * dev * old_inv_s2);
1001 0 : new_ene += std::log1p( 0.5 * dev * dev * new_inv_s2);
1002 : }
1003 : // final energy calculation: add normalization and prior
1004 0 : old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
1005 0 : new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
1006 : }
1007 :
1008 : // increment number of trials
1009 0 : MCtrials_ += 1.0;
1010 :
1011 : // accept or reject
1012 0 : bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
1013 0 : if(accept) {
1014 0 : sigma_[nGMM] = new_s;
1015 0 : MCaccept_ += 1.0;
1016 : }
1017 : // local communication
1018 0 : if(rank_!=0) {
1019 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
1020 0 : sigma_[i] = 0.0;
1021 : }
1022 0 : MCaccept_ = 0.0;
1023 : }
1024 0 : if(size_>1) {
1025 0 : comm.Sum(&sigma_[0], sigma_.size());
1026 0 : comm.Sum(&MCaccept_, 1);
1027 : }
1028 :
1029 : // update sigma output
1030 : std::string num;
1031 0 : Tools::convert(nGMM, num);
1032 0 : getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
1033 0 : }
1034 :
1035 0 : std::vector<double> EMMI::read_exp_errors(const std::string & errfile) {
1036 : int nexp, idcomp;
1037 : double err;
1038 : std::vector<double> exp_err;
1039 : // open file
1040 0 : IFile *ifile = new IFile();
1041 0 : if(ifile->FileExist(errfile)) {
1042 0 : ifile->open(errfile);
1043 : // scan for number of experimental errors
1044 0 : ifile->scanField("Nexp", nexp);
1045 : // cycle on GMM components
1046 0 : while(ifile->scanField("Id",idcomp)) {
1047 : // total experimental error
1048 0 : double err_tot = 0.0;
1049 : // cycle on number of experimental overlaps
1050 0 : for(unsigned i=0; i<nexp; ++i) {
1051 : std::string ss;
1052 0 : Tools::convert(i,ss);
1053 0 : ifile->scanField("Err"+ss, err);
1054 : // add to total error
1055 0 : err_tot += err*err;
1056 : }
1057 : // new line
1058 0 : ifile->scanField();
1059 : // calculate RMSE
1060 0 : err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
1061 : // add to global
1062 0 : exp_err.push_back(err_tot);
1063 : }
1064 0 : ifile->close();
1065 : } else {
1066 0 : error("Cannot find ERR_FILE "+errfile+"\n");
1067 : }
1068 0 : return exp_err;
1069 : }
1070 :
1071 0 : std::vector<double> EMMI::read_exp_overlaps(const std::string & ovfile) {
1072 : int idcomp;
1073 : double ov;
1074 : std::vector<double> ovdd;
1075 : // open file
1076 0 : IFile *ifile = new IFile();
1077 0 : if(ifile->FileExist(ovfile)) {
1078 0 : ifile->open(ovfile);
1079 : // cycle on GMM components
1080 0 : while(ifile->scanField("Id",idcomp)) {
1081 : // read experimental overlap
1082 0 : ifile->scanField("Overlap", ov);
1083 : // add to ovdd
1084 0 : ovdd.push_back(ov);
1085 : // new line
1086 0 : ifile->scanField();
1087 : }
1088 0 : ifile->close();
1089 : } else {
1090 0 : error("Cannot find OV_FILE "+ovfile+"\n");
1091 : }
1092 0 : return ovdd;
1093 : }
1094 :
1095 30 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms) {
1096 : // list of weights - one per atom
1097 : std::vector<double> GMM_m_w;
1098 :
1099 30 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
1100 : // map of atom types to A and B coefficients of scattering factor
1101 : // f(s) = A * exp(-B*s**2)
1102 : // B is in Angstrom squared
1103 : // map between an atom type and an index
1104 : std::map<std::string, unsigned> type_map;
1105 30 : type_map["C"]=0;
1106 30 : type_map["O"]=1;
1107 30 : type_map["N"]=2;
1108 30 : type_map["S"]=3;
1109 : // fill in sigma std::vector
1110 30 : GMM_m_s_.push_back(0.01*15.146); // type 0
1111 30 : GMM_m_s_.push_back(0.01*8.59722); // type 1
1112 30 : GMM_m_s_.push_back(0.01*11.1116); // type 2
1113 30 : GMM_m_s_.push_back(0.01*15.8952); // type 3
1114 : // fill in weight std::vector
1115 30 : GMM_m_w_.push_back(2.49982); // type 0
1116 30 : GMM_m_w_.push_back(1.97692); // type 1
1117 30 : GMM_m_w_.push_back(2.20402); // type 2
1118 30 : GMM_m_w_.push_back(5.14099); // type 3
1119 :
1120 : // check if MOLINFO line is present
1121 30 : if( moldat ) {
1122 30 : log<<" MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
1123 16906 : for(unsigned i=0; i<atoms.size(); ++i) {
1124 : // get atom name
1125 16876 : std::string name = moldat->getAtomName(atoms[i]);
1126 : char type;
1127 : // get atom type
1128 16876 : char first = name.at(0);
1129 : // GOLDEN RULE: type is first letter, if not a number
1130 16876 : if (!isdigit(first)) {
1131 : type = first;
1132 : // otherwise is the second
1133 : } else {
1134 0 : type = name.at(1);
1135 : }
1136 : // check if key in map
1137 16876 : std::string type_s = std::string(1,type);
1138 16876 : if(type_map.find(type_s) != type_map.end()) {
1139 : // save atom type
1140 16876 : GMM_m_type_.push_back(type_map[type_s]);
1141 : // this will be normalized in the final density
1142 16876 : GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
1143 : } else {
1144 0 : error("Wrong atom type "+type_s+" from atom name "+name+"\n");
1145 : }
1146 : }
1147 : } else {
1148 0 : error("MOLINFO DATA not found\n");
1149 : }
1150 30 : return GMM_m_w;
1151 : }
1152 :
1153 2052 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w) {
1154 :
1155 : // check if positive defined, by calculating the 3 leading principal minors
1156 2052 : double pm1 = cov[0];
1157 2052 : double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
1158 2052 : double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
1159 : // apply Sylvester’s criterion
1160 2052 : if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0) {
1161 0 : error("check data GMM: covariance matrix is not positive defined");
1162 : }
1163 :
1164 : // check if weight is positive
1165 2052 : if(w<=0) {
1166 0 : error("check data GMM: weight must be positive");
1167 : }
1168 2052 : }
1169 :
1170 : // read GMM data file in PLUMED format:
1171 30 : void EMMI::get_GMM_d(const std::string & GMM_file) {
1172 30 : VectorGeneric<6> cov;
1173 :
1174 : // open file
1175 : auto ifile=Tools::make_unique<IFile>();
1176 30 : if(ifile->FileExist(GMM_file)) {
1177 30 : ifile->open(GMM_file);
1178 : int idcomp;
1179 4164 : while(ifile->scanField("Id",idcomp)) {
1180 : int beta;
1181 : double w, m0, m1, m2;
1182 4104 : ifile->scanField("Weight",w);
1183 4104 : ifile->scanField("Mean_0",m0);
1184 4104 : ifile->scanField("Mean_1",m1);
1185 4104 : ifile->scanField("Mean_2",m2);
1186 4104 : ifile->scanField("Cov_00",cov[0]);
1187 4104 : ifile->scanField("Cov_01",cov[1]);
1188 4104 : ifile->scanField("Cov_02",cov[2]);
1189 4104 : ifile->scanField("Cov_11",cov[3]);
1190 4104 : ifile->scanField("Cov_12",cov[4]);
1191 4104 : ifile->scanField("Cov_22",cov[5]);
1192 2052 : ifile->scanField("Beta",beta);
1193 : // check input
1194 2052 : check_GMM_d(cov, w);
1195 : // check beta
1196 2052 : if(beta<0) {
1197 0 : error("Beta must be positive!");
1198 : }
1199 : // center of the Gaussian
1200 2052 : GMM_d_m_.push_back(Vector(m0,m1,m2));
1201 : // covariance matrix
1202 2052 : GMM_d_cov_.push_back(cov);
1203 : // weight
1204 2052 : GMM_d_w_.push_back(w);
1205 : // beta
1206 2052 : GMM_d_beta_.push_back(beta);
1207 : // new line
1208 2052 : ifile->scanField();
1209 : }
1210 : } else {
1211 0 : error("Cannot find GMM_FILE "+GMM_file+"\n");
1212 : }
1213 : // now create a set from beta (unique set of values)
1214 30 : std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
1215 : // now prepare the group std::vector
1216 30 : GMM_d_grps_.resize(bu.size());
1217 : // and fill it in
1218 2082 : for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
1219 2052 : if(GMM_d_beta_[i]>=GMM_d_grps_.size()) {
1220 0 : error("Check Beta values");
1221 : }
1222 2052 : GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
1223 : }
1224 30 : }
1225 :
1226 30 : void EMMI::calculate_useful_stuff(double reso) {
1227 : // We use the following definition for resolution:
1228 : // the Fourier transform of the density distribution in real space
1229 : // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
1230 : // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
1231 : // average value of B
1232 : double Bave = 0.0;
1233 16906 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1234 16876 : Bave += GMM_m_s_[GMM_m_type_[i]];
1235 : }
1236 30 : Bave /= static_cast<double>(GMM_m_type_.size());
1237 : // calculate blur factor
1238 : double blur = 0.0;
1239 30 : if(reso*reso>Bave) {
1240 2 : blur = reso*reso-Bave;
1241 : } else {
1242 56 : warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
1243 : }
1244 : // add blur to B
1245 150 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
1246 120 : GMM_m_s_[i] += blur;
1247 : }
1248 : // calculate average resolution
1249 : double ave_res = 0.0;
1250 16906 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1251 16876 : ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
1252 : }
1253 30 : ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
1254 30 : log.printf(" experimental map resolution : %3.2f\n", reso);
1255 30 : log.printf(" predicted map resolution : %3.2f\n", ave_res);
1256 30 : log.printf(" blur factor : %f\n", blur);
1257 : // now calculate useful stuff
1258 30 : VectorGeneric<6> cov, sum, inv_sum;
1259 : // cycle on all atoms types (4 for the moment)
1260 150 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
1261 : // the Gaussian in density (real) space is the FT of scattering factor
1262 : // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
1263 120 : double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
1264 : // covariance matrix for spherical Gaussian
1265 120 : cov[0]=s*s;
1266 120 : cov[1]=0.0;
1267 120 : cov[2]=0.0;
1268 120 : cov[3]=s*s;
1269 120 : cov[4]=0.0;
1270 120 : cov[5]=s*s;
1271 : // cycle on all data GMM
1272 8328 : for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
1273 : // we need the sum of the covariance matrices
1274 57456 : for(unsigned k=0; k<6; ++k) {
1275 49248 : sum[k] = cov[k] + GMM_d_cov_[j][k];
1276 : }
1277 : // and to calculate its determinant
1278 8208 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1279 8208 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1280 8208 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1281 : // calculate prefactor - model weights are already normalized
1282 8208 : double pre_fact = cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
1283 : // and its inverse
1284 8208 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1285 8208 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1286 8208 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1287 8208 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1288 8208 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1289 8208 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1290 : // now we store the prefactor
1291 8208 : pre_fact_.push_back(pre_fact);
1292 : // and the inverse of the sum
1293 8208 : inv_cov_md_.push_back(inv_sum);
1294 : }
1295 : }
1296 : // tabulate exponential
1297 30 : dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
1298 30000030 : for(unsigned i=0; i<nexp_; ++i) {
1299 30000000 : tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
1300 : }
1301 30 : }
1302 :
1303 : // get prefactors
1304 1622268 : double EMMI::get_prefactor_inverse
1305 : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
1306 : double GMM_w_0, double GMM_w_1,
1307 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum) {
1308 : // we need the sum of the covariance matrices
1309 11355876 : for(unsigned k=0; k<6; ++k) {
1310 9733608 : sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
1311 : }
1312 :
1313 : // and to calculate its determinant
1314 1622268 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1315 1622268 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1316 1622268 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1317 :
1318 : // the prefactor is
1319 1622268 : double pre_fact = cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
1320 :
1321 : // and its inverse
1322 1622268 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1323 1622268 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1324 1622268 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1325 1622268 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1326 1622268 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1327 1622268 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1328 :
1329 : // return pre-factor
1330 1622268 : return pre_fact;
1331 : }
1332 :
1333 2052 : double EMMI::get_self_overlap(unsigned id) {
1334 : double ov_tot = 0.0;
1335 2052 : VectorGeneric<6> sum, inv_sum;
1336 2052 : Vector ov_der;
1337 : // start loop
1338 1624320 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
1339 : // call auxiliary method
1340 1622268 : double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
1341 1622268 : GMM_d_w_[id], GMM_d_w_[i], sum, inv_sum);
1342 : // add overlap to ov_tot
1343 1622268 : ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
1344 : }
1345 : // and return it
1346 2052 : return ov_tot;
1347 : }
1348 :
1349 : // get overlap and derivatives
1350 3737255 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
1351 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der) {
1352 3737255 : Vector md;
1353 : // calculate std::vector difference m_m-d_m with/without pbc
1354 3737255 : if(pbc_) {
1355 0 : md = pbcDistance(d_m, m_m);
1356 : } else {
1357 3737255 : md = delta(d_m, m_m);
1358 : }
1359 : // calculate product of transpose of md and inv_cov_md
1360 3737255 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1361 3737255 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1362 3737255 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1363 : // calculate product of prod and md
1364 3737255 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1365 : // final calculation
1366 3737255 : ov = pre_fact * std::exp(-0.5*ov);
1367 : // derivatives
1368 3737255 : ov_der = ov * Vector(p_x, p_y, p_z);
1369 3737255 : return ov;
1370 : }
1371 :
1372 : // get the exponent of the overlap
1373 59106708 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
1374 : const VectorGeneric<6> &inv_cov_md) {
1375 59106708 : Vector md;
1376 : // calculate std::vector difference m_m-d_m with/without pbc
1377 59106708 : if(pbc_) {
1378 0 : md = pbcDistance(d_m, m_m);
1379 : } else {
1380 59106708 : md = delta(d_m, m_m);
1381 : }
1382 : // calculate product of transpose of md and inv_cov_md
1383 59106708 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1384 59106708 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1385 59106708 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1386 : // calculate product of prod and md
1387 59106708 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1388 59106708 : return ov;
1389 : }
1390 :
1391 18180 : void EMMI::update_neighbor_list() {
1392 : // dimension of GMM and atom std::vectors
1393 18180 : unsigned GMM_d_size = GMM_d_m_.size();
1394 18180 : unsigned GMM_m_size = GMM_m_type_.size();
1395 : // local neighbor list
1396 : std::vector < unsigned > nl_l;
1397 : // clear old neighbor list
1398 18180 : nl_.clear();
1399 :
1400 : // cycle on GMM components - in parallel
1401 118134 : for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
1402 : // overlap lists and map
1403 : std::vector<double> ov_l;
1404 : std::map<double, unsigned> ov_m;
1405 : // total overlap with id
1406 : double ov_tot = 0.0;
1407 : // cycle on all atoms
1408 59206662 : for(unsigned im=0; im<GMM_m_size; ++im) {
1409 : // get index in auxiliary lists
1410 59106708 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1411 : // calculate exponent of overlap
1412 59106708 : double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
1413 : // get index of 0.5*expov in tabulated exponential
1414 59106708 : unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
1415 : // check boundaries and skip atom in case
1416 59106708 : if(itab >= tab_exp_.size()) {
1417 54971808 : continue;
1418 : }
1419 : // in case calculate overlap
1420 4134900 : double ov = pre_fact_[kaux] * tab_exp_[itab];
1421 : // add to list
1422 4134900 : ov_l.push_back(ov);
1423 : // and map to retrieve atom index
1424 4134900 : ov_m[ov] = im;
1425 : // increase ov_tot
1426 4134900 : ov_tot += ov;
1427 : }
1428 : // check if zero size -> ov_tot = 0
1429 99954 : if(ov_l.size()==0) {
1430 : continue;
1431 : }
1432 : // define cutoff
1433 99878 : double ov_cut = ov_tot * nl_cutoff_;
1434 : // sort ov_l in ascending order
1435 99878 : std::sort(ov_l.begin(), ov_l.end());
1436 : // integrate ov_l
1437 : double res = 0.0;
1438 2146886 : for(unsigned i=0; i<ov_l.size(); ++i) {
1439 2146886 : res += ov_l[i];
1440 : // if exceeding the cutoff for overlap, stop
1441 2146886 : if(res >= ov_cut) {
1442 : break;
1443 : } else {
1444 : ov_m.erase(ov_l[i]);
1445 : }
1446 : }
1447 : // now add atoms to neighborlist
1448 2187770 : for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it) {
1449 2087892 : nl_l.push_back(id*GMM_m_size+it->second);
1450 : }
1451 : // end cycle on GMM components in parallel
1452 : }
1453 : // find total dimension of neighborlist
1454 18180 : std::vector <int> recvcounts(size_, 0);
1455 18180 : recvcounts[rank_] = nl_l.size();
1456 18180 : comm.Sum(&recvcounts[0], size_);
1457 : int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
1458 : // resize neighbor stuff
1459 18180 : nl_.resize(tot_size);
1460 : // calculate std::vector of displacement
1461 18180 : std::vector<int> disp(size_);
1462 18180 : disp[0] = 0;
1463 : int rank_size = 0;
1464 32724 : for(unsigned i=0; i<size_-1; ++i) {
1465 14544 : rank_size += recvcounts[i];
1466 14544 : disp[i+1] = rank_size;
1467 : }
1468 : // Allgather neighbor list
1469 18180 : comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
1470 : // now resize derivatives
1471 18180 : ovmd_der_.resize(tot_size);
1472 18180 : }
1473 :
1474 70 : void EMMI::prepare() {
1475 70 : if(getExchangeStep()) {
1476 0 : first_time_=true;
1477 : }
1478 70 : }
1479 :
1480 : // overlap calculator
1481 18220 : void EMMI::calculate_overlap() {
1482 :
1483 18220 : if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
1484 18180 : update_neighbor_list();
1485 18180 : first_time_=false;
1486 : }
1487 :
1488 : // clean temporary std::vectors
1489 192892 : for(unsigned i=0; i<ovmd_.size(); ++i) {
1490 174672 : ovmd_[i] = 0.0;
1491 : }
1492 3525024 : for(unsigned i=0; i<ovmd_der_.size(); ++i) {
1493 3506804 : ovmd_der_[i] = Vector(0,0,0);
1494 : }
1495 :
1496 : // we have to cycle over all model and data GMM components in the neighbor list
1497 18220 : unsigned GMM_d_size = GMM_d_m_.size();
1498 18220 : unsigned GMM_m_size = GMM_m_type_.size();
1499 2133207 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1500 : // get data (id) and atom (im) indexes
1501 2114987 : unsigned id = nl_[i] / GMM_m_size;
1502 2114987 : unsigned im = nl_[i] % GMM_m_size;
1503 : // get index in auxiliary lists
1504 2114987 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1505 : // add overlap with im component of model GMM
1506 2114987 : ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
1507 2114987 : inv_cov_md_[kaux], ovmd_der_[i]);
1508 : }
1509 : // communicate stuff
1510 18220 : if(size_>1) {
1511 14574 : comm.Sum(&ovmd_[0], ovmd_.size());
1512 14574 : comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
1513 : }
1514 18220 : }
1515 :
1516 0 : double EMMI::scaleEnergy(double s) {
1517 : double ene = 0.0;
1518 0 : for(unsigned i=0; i<ovdd_.size(); ++i) {
1519 0 : ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
1520 : }
1521 0 : return ene;
1522 : }
1523 :
1524 0 : double EMMI::doRegression() {
1525 : // standard MC parameters
1526 : unsigned MCsteps = 100000;
1527 : double kbtmin = 1.0;
1528 : double kbtmax = 10.0;
1529 : unsigned ncold = 5000;
1530 : unsigned nhot = 2000;
1531 : double MCacc = 0.0;
1532 : double kbt, ebest, scale_best;
1533 :
1534 : // initial value of scale factor and energy
1535 0 : double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
1536 0 : double ene = scaleEnergy(scale);
1537 : // set best energy
1538 0 : ebest = ene;
1539 :
1540 : // MC loop
1541 0 : for(unsigned istep=0; istep<MCsteps; ++istep) {
1542 : // get temperature
1543 0 : if(istep%(ncold+nhot)<ncold) {
1544 : kbt = kbtmin;
1545 : } else {
1546 : kbt = kbtmax;
1547 : }
1548 : // propose move in scale
1549 0 : double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
1550 0 : double new_scale = scale + ds;
1551 : // check boundaries
1552 0 : if(new_scale > scale_max_) {
1553 0 : new_scale = 2.0 * scale_max_ - new_scale;
1554 : }
1555 0 : if(new_scale < scale_min_) {
1556 0 : new_scale = 2.0 * scale_min_ - new_scale;
1557 : }
1558 : // new energy
1559 0 : double new_ene = scaleEnergy(new_scale);
1560 : // accept or reject
1561 0 : bool accept = doAccept(ene, new_ene, kbt);
1562 : // in case of acceptance
1563 0 : if(accept) {
1564 : scale = new_scale;
1565 : ene = new_ene;
1566 0 : MCacc += 1.0;
1567 : }
1568 : // save best
1569 0 : if(ene<ebest) {
1570 0 : ebest = ene;
1571 0 : scale_best = scale;
1572 : }
1573 : }
1574 : // calculate acceptance
1575 0 : double accscale = MCacc / static_cast<double>(MCsteps);
1576 : // global communication
1577 0 : if(!no_aver_ && nrep_>1) {
1578 0 : if(replica_!=0) {
1579 0 : scale_best = 0.0;
1580 0 : ebest = 0.0;
1581 0 : accscale = 0.0;
1582 : }
1583 0 : if(rank_==0) {
1584 0 : multi_sim_comm.Sum(&scale_best, 1);
1585 0 : multi_sim_comm.Sum(&ebest, 1);
1586 0 : multi_sim_comm.Sum(&accscale, 1);
1587 : }
1588 : }
1589 : // local communication
1590 0 : if(rank_!=0) {
1591 0 : scale_best = 0.0;
1592 0 : ebest = 0.0;
1593 0 : accscale = 0.0;
1594 : }
1595 0 : if(size_>1) {
1596 0 : comm.Sum(&scale_best, 1);
1597 0 : comm.Sum(&ebest, 1);
1598 0 : comm.Sum(&accscale, 1);
1599 : }
1600 : // set scale parameters
1601 0 : getPntrToComponent("accscale")->set(accscale);
1602 0 : getPntrToComponent("enescale")->set(ebest);
1603 : // return scale value
1604 0 : return scale_best;
1605 : }
1606 :
1607 20 : double EMMI::get_annealing(long long int step) {
1608 : // default no annealing
1609 : double fact = 1.0;
1610 : // position in annealing cycle
1611 20 : unsigned nc = step%(4*nanneal_);
1612 : // useful doubles
1613 20 : double ncd = static_cast<double>(nc);
1614 20 : double nn = static_cast<double>(nanneal_);
1615 : // set fact
1616 20 : if(nc>=nanneal_ && nc<2*nanneal_) {
1617 6 : fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
1618 : }
1619 20 : if(nc>=2*nanneal_ && nc<3*nanneal_) {
1620 4 : fact = kanneal_;
1621 : }
1622 20 : if(nc>=3*nanneal_) {
1623 2 : fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
1624 : }
1625 20 : return fact;
1626 : }
1627 :
1628 18220 : void EMMI::get_weights(double &weight, double &norm, double &neff) {
1629 18220 : const double dnrep = static_cast<double>(nrep_);
1630 : // calculate the weights either from BIAS
1631 18220 : if(do_reweight_) {
1632 12 : std::vector<double> bias(nrep_,0);
1633 12 : if(rank_==0) {
1634 12 : bias[replica_] = getArgument(0);
1635 12 : if(nrep_>1) {
1636 12 : multi_sim_comm.Sum(&bias[0], nrep_);
1637 : }
1638 : }
1639 12 : comm.Sum(&bias[0], nrep_);
1640 :
1641 : // accumulate weights
1642 12 : if(!first_time_w_) {
1643 30 : for(unsigned i=0; i<nrep_; ++i) {
1644 20 : const double delta=bias[i]-average_weights_[i];
1645 : // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
1646 : // probably because we're making several calls to calculate(), causing accumulation
1647 : // of epsilons. Maybe we can work on a temporary copy of the action instead?
1648 20 : average_weights_[i]+=decay_w_*delta;
1649 : }
1650 : } else {
1651 2 : first_time_w_ = false;
1652 6 : for(unsigned i=0; i<nrep_; ++i) {
1653 4 : average_weights_[i] = bias[i];
1654 : }
1655 : }
1656 :
1657 : // set average back into bias and set norm to one
1658 12 : const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
1659 36 : for(unsigned i=0; i<nrep_; ++i) {
1660 24 : bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
1661 : }
1662 : // set local weight, norm and weight variance
1663 12 : weight = bias[replica_];
1664 : double w2=0.;
1665 36 : for(unsigned i=0; i<nrep_; ++i) {
1666 24 : w2 += bias[i]*bias[i];
1667 24 : norm += bias[i];
1668 : }
1669 12 : neff = norm*norm/w2;
1670 24 : getPntrToComponent("weight")->set(weight/norm);
1671 : } else {
1672 : // or arithmetic ones
1673 18208 : neff = dnrep;
1674 18208 : weight = 1.0;
1675 18208 : norm = dnrep;
1676 : }
1677 18220 : getPntrToComponent("neff")->set(neff);
1678 18220 : }
1679 :
1680 18220 : void EMMI::calculate() {
1681 :
1682 : // calculate CV
1683 18220 : calculate_overlap();
1684 :
1685 : // rescale factor for ensemble average
1686 18220 : double weight = 0.;
1687 18220 : double neff = 0.;
1688 18220 : double norm = 0.;
1689 18220 : get_weights(weight, norm, neff);
1690 :
1691 : // in case of ensemble averaging, calculate average overlap
1692 18220 : if(!no_aver_ && nrep_>1) {
1693 : // if master node, calculate average across replicas
1694 12 : if(rank_==0) {
1695 10812 : for(unsigned i=0; i<ovmd_.size(); ++i) {
1696 10800 : ovmd_ave_[i] = weight / norm * ovmd_[i];
1697 : }
1698 12 : multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1699 : } else {
1700 0 : for(unsigned i=0; i<ovmd_ave_.size(); ++i) {
1701 0 : ovmd_ave_[i] = 0.0;
1702 : }
1703 : }
1704 : // local communication
1705 12 : if(size_>1) {
1706 0 : comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1707 : }
1708 : } else {
1709 182080 : for(unsigned i=0; i<ovmd_.size(); ++i) {
1710 163872 : ovmd_ave_[i] = ovmd_[i];
1711 : }
1712 : }
1713 :
1714 : // get time step
1715 18220 : long long int step = getStep();
1716 :
1717 : // do regression
1718 18220 : if(nregres_>0) {
1719 0 : if(step%nregres_==0 && !getExchangeStep()) {
1720 0 : scale_ = doRegression();
1721 : }
1722 : // set scale component
1723 0 : getPntrToComponent("scale")->set(scale_);
1724 : }
1725 :
1726 : // write model overlap to file
1727 18220 : if(ovstride_>0 && step%ovstride_==0) {
1728 4 : write_model_overlap(step);
1729 : }
1730 :
1731 : // clear energy and virial
1732 18220 : ene_ = 0.0;
1733 18220 : virial_.zero();
1734 :
1735 : // Gaussian noise
1736 18220 : if(noise_==0) {
1737 7318 : calculate_Gauss();
1738 : }
1739 :
1740 : // Outliers noise
1741 18220 : if(noise_==1) {
1742 5451 : calculate_Outliers();
1743 : }
1744 :
1745 : // Marginal noise
1746 18220 : if(noise_==2) {
1747 5451 : calculate_Marginal();
1748 : }
1749 :
1750 : // get annealing rescale factor
1751 18220 : if(nanneal_>0) {
1752 20 : anneal_ = get_annealing(step);
1753 40 : getPntrToComponent("anneal")->set(anneal_);
1754 : }
1755 :
1756 : // annealing rescale
1757 18220 : ene_ /= anneal_;
1758 :
1759 18220 : std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
1760 : // in case of ensemble averaging
1761 18220 : if(!no_aver_ && nrep_>1) {
1762 : // if master node, sum der_GMMid derivatives and ene
1763 12 : if(rank_==0) {
1764 10812 : for(unsigned i=0; i<GMMid_der_.size(); ++i) {
1765 10800 : GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
1766 : }
1767 12 : multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1768 12 : multi_sim_comm.Sum(&ene_, 1);
1769 : } else {
1770 : // set der_GMMid derivatives and energy to zero
1771 0 : for(unsigned i=0; i<GMMid_der_av_.size(); ++i) {
1772 0 : GMMid_der_av_[i]=0.0;
1773 : }
1774 0 : ene_ = 0.0;
1775 : }
1776 : // local communication
1777 12 : if(size_>1) {
1778 0 : comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1779 0 : comm.Sum(&ene_, 1);
1780 : }
1781 : } else {
1782 182080 : for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
1783 163872 : GMMid_der_av_[i] = GMMid_der_[i];
1784 : }
1785 : }
1786 :
1787 : // clean temporary std::vector
1788 10979556 : for(unsigned i=0; i<atom_der_.size(); ++i) {
1789 10961336 : atom_der_[i] = Vector(0,0,0);
1790 : }
1791 :
1792 : // get derivatives of bias with respect to atoms
1793 2133207 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1794 : // get indexes of data and model component
1795 2114987 : unsigned id = nl_[i] / GMM_m_type_.size();
1796 2114987 : unsigned im = nl_[i] % GMM_m_type_.size();
1797 : // chain rule + replica normalization
1798 2114987 : Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
1799 2114987 : Vector pos;
1800 2114987 : if(pbc_) {
1801 0 : pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
1802 : } else {
1803 2114987 : pos = getPosition(im);
1804 : }
1805 : // increment derivatives and virial
1806 2114987 : atom_der_[im] += tot_der;
1807 2114987 : virial_ += Tensor(pos, -tot_der);
1808 : }
1809 :
1810 : // communicate local derivatives and virial
1811 18220 : if(size_>1) {
1812 14574 : comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
1813 14574 : comm.Sum(virial_);
1814 : }
1815 :
1816 : // set derivatives, virial, and score
1817 10979556 : for(unsigned i=0; i<atom_der_.size(); ++i) {
1818 21922672 : setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
1819 : }
1820 18220 : setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
1821 18220 : getPntrToComponent("scoreb")->set(ene_);
1822 :
1823 18220 : if (do_reweight_) {
1824 : double w_tmp = 0.;
1825 10812 : for (unsigned i = 0; i < ovmd_.size(); ++i) {
1826 10800 : w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
1827 : }
1828 12 : w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
1829 :
1830 12 : setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
1831 24 : getPntrToComponent("biasDer")->set(w_tmp);
1832 : }
1833 :
1834 : // This part is needed only for Gaussian and Outliers noise models
1835 18220 : if(noise_!=2) {
1836 :
1837 : // do Montecarlo
1838 12769 : if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) {
1839 0 : doMonteCarlo();
1840 : }
1841 :
1842 : // print status
1843 12769 : if(step%statusstride_==0) {
1844 12729 : print_status(step);
1845 : }
1846 :
1847 : // calculate acceptance ratio
1848 12769 : double acc = MCaccept_ / MCtrials_;
1849 :
1850 : // set value
1851 25538 : getPntrToComponent("acc")->set(acc);
1852 :
1853 : }
1854 :
1855 18220 : }
1856 :
1857 7318 : void EMMI::calculate_Gauss() {
1858 : // cycle on all the GMM groups
1859 14636 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1860 : double eneg = 0.0;
1861 : // cycle on all the members of the group
1862 83872 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1863 : // id of the GMM component
1864 76554 : int GMMid = GMM_d_grps_[i][j];
1865 : // calculate deviation
1866 76554 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1867 : // add to group energy
1868 76554 : eneg += 0.5 * dev * dev;
1869 : // store derivative for later
1870 76554 : GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
1871 : }
1872 : // add to total energy along with normalizations and prior
1873 7318 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1874 : }
1875 7318 : }
1876 :
1877 5451 : void EMMI::calculate_Outliers() {
1878 : // cycle on all the GMM groups
1879 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1880 : // cycle on all the members of the group
1881 : double eneg = 0.0;
1882 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1883 : // id of the GMM component
1884 49059 : int GMMid = GMM_d_grps_[i][j];
1885 : // calculate deviation
1886 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1887 : // add to group energy
1888 49059 : eneg += std::log1p( 0.5 * dev * dev );
1889 : // store derivative for later
1890 49059 : GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
1891 : }
1892 : // add to total energy along with normalizations and prior
1893 5451 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1894 : }
1895 5451 : }
1896 :
1897 5451 : void EMMI::calculate_Marginal() {
1898 : // cycle on all the GMM groups
1899 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1900 : // cycle on all the members of the group
1901 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1902 : // id of the GMM component
1903 49059 : int GMMid = GMM_d_grps_[i][j];
1904 : // calculate deviation
1905 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
1906 : // calculate errf
1907 49059 : double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
1908 : // add to group energy
1909 49059 : ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
1910 : // store derivative for later
1911 49059 : GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
1912 : }
1913 : }
1914 5451 : }
1915 :
1916 : }
1917 : }
|