Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2017-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionAtomistic.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "core/ActionRegister.h"
26 : #include "core/PlumedMain.h"
27 : #include "tools/Matrix.h"
28 : #include "core/GenericMolInfo.h"
29 : #include "core/ActionSet.h"
30 : #include "tools/File.h"
31 : #include "tools/Random.h"
32 :
33 : #include <string>
34 : #include <map>
35 : #include <numeric>
36 : #include <ctime>
37 :
38 : namespace PLMD {
39 : namespace isdb {
40 :
41 : //+PLUMEDOC ISDB_COLVAR EMMI
42 : /*
43 : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
44 :
45 : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in Ref. \cite Hanot113951 .
46 : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
47 :
48 : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
49 : GMM_FILE. We are currently working on a web server to perform
50 : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
51 :
52 : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
53 : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
54 : the Metainference approach \cite Bonomi:2016ip .
55 :
56 : \warning
57 : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
58 :
59 : \note
60 : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
61 : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
62 : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
63 :
64 : \note
65 : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
66 : add the NOPBC flag to the input line
67 :
68 : \par Examples
69 :
70 : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
71 : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
72 :
73 : \plumedfile
74 : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
75 : 0 2.9993805e+01 6.54628 10.37820 -0.92988 2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02 1
76 : 1 2.3468312e+01 6.56095 10.34790 -0.87808 1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02 1
77 : ...
78 : \endplumedfile
79 :
80 : To accelerate the computation of the Bayesian score, one can:
81 : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
82 : - calculate the restraint every other step (or more).
83 :
84 : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
85 : using a GROMACS index file.
86 :
87 : The input file looks as follows:
88 :
89 : \plumedfile
90 : # include pdb info
91 : MOLINFO STRUCTURE=prot.pdb
92 :
93 : # all heavy atoms
94 : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
95 :
96 : # create EMMI score
97 : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
98 :
99 : # translate into bias - apply every 2 steps
100 : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
101 :
102 : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
103 : \endplumedfile
104 :
105 :
106 : */
107 : //+ENDPLUMEDOC
108 :
109 : class EMMI :
110 : public ActionAtomistic,
111 : public ActionWithArguments,
112 : public ActionWithValue
113 : {
114 : private:
115 :
116 : // temperature in kbt
117 : double kbt_;
118 : // model GMM - atom types
119 : std::vector<unsigned> GMM_m_type_;
120 : // model GMM - list of atom sigmas - one per atom type
121 : std::vector<double> GMM_m_s_;
122 : // model GMM - list of atom weights - one per atom type
123 : std::vector<double> GMM_m_w_;
124 : // data GMM - means, weights, and covariances + beta option
125 : std::vector<Vector> GMM_d_m_;
126 : std::vector<double> GMM_d_w_;
127 : std::vector< VectorGeneric<6> > GMM_d_cov_;
128 : std::vector<int> GMM_d_beta_;
129 : std::vector < std::vector<int> > GMM_d_grps_;
130 : // overlaps
131 : std::vector<double> ovmd_;
132 : std::vector<double> ovdd_;
133 : std::vector<double> ovmd_ave_;
134 : // and derivatives
135 : std::vector<Vector> ovmd_der_;
136 : std::vector<Vector> atom_der_;
137 : std::vector<double> GMMid_der_;
138 : // constants
139 : double cfact_;
140 : double inv_sqrt2_, sqrt2_pi_;
141 : // metainference
142 : unsigned nrep_;
143 : unsigned replica_;
144 : std::vector<double> sigma_;
145 : std::vector<double> sigma_min_;
146 : std::vector<double> sigma_max_;
147 : std::vector<double> dsigma_;
148 : // list of prefactors for overlap between two Gaussians
149 : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
150 : std::vector<double> pre_fact_;
151 : // inverse of the sum of model and data covariances matrices
152 : std::vector< VectorGeneric<6> > inv_cov_md_;
153 : // neighbor list
154 : double nl_cutoff_;
155 : unsigned nl_stride_;
156 : bool first_time_;
157 : bool no_aver_;
158 : std::vector<unsigned> nl_;
159 : // parallel stuff
160 : unsigned size_;
161 : unsigned rank_;
162 : // pbc
163 : bool pbc_;
164 : // Monte Carlo stuff
165 : int MCstride_;
166 : double MCaccept_;
167 : double MCtrials_;
168 : Random random_;
169 : // status stuff
170 : unsigned int statusstride_;
171 : std::string statusfilename_;
172 : OFile statusfile_;
173 : bool first_status_;
174 : // regression
175 : unsigned nregres_;
176 : double scale_;
177 : double scale_min_;
178 : double scale_max_;
179 : double dscale_;
180 : // tabulated exponential
181 : double dpcutoff_;
182 : double dexp_;
183 : unsigned nexp_;
184 : std::vector<double> tab_exp_;
185 : // simulated annealing
186 : unsigned nanneal_;
187 : double kanneal_;
188 : double anneal_;
189 : // prior exponent
190 : double prior_;
191 : // noise type
192 : unsigned noise_;
193 : // total score and virial;
194 : double ene_;
195 : Tensor virial_;
196 : // model overlap file
197 : unsigned int ovstride_;
198 : std::string ovfilename_;
199 :
200 : // Reweighting additions
201 : bool do_reweight_;
202 : bool first_time_w_;
203 : std::vector<double> forces;
204 : std::vector<double> forcesToApply;
205 :
206 : // average weights
207 : double decay_w_;
208 : std::vector<double> average_weights_;
209 :
210 : // write file with model overlap
211 : void write_model_overlap(long int step);
212 : // get median of std::vector
213 : double get_median(std::vector<double> &v);
214 : // annealing
215 : double get_annealing(long int step);
216 : // do regression
217 : double scaleEnergy(double s);
218 : double doRegression();
219 : // read and write status
220 : void read_status();
221 : void print_status(long int step);
222 : // accept or reject
223 : bool doAccept(double oldE, double newE, double kbt);
224 : // do MonteCarlo
225 : void doMonteCarlo();
226 : // read error file
227 : std::vector<double> read_exp_errors(std::string errfile);
228 : // read experimental overlaps
229 : std::vector<double> read_exp_overlaps(std::string ovfile);
230 : // calculate model GMM weights and covariances
231 : std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
232 : // read data GMM file
233 : void get_GMM_d(std::string gmm_file);
234 : // check GMM data
235 : void check_GMM_d(const VectorGeneric<6> &cov, double w);
236 : // auxiliary method
237 : void calculate_useful_stuff(double reso);
238 : // get pref_fact and inv_cov_md
239 : double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
240 : double GMM_w_0, double GMM_w_1,
241 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
242 : // calculate self overlaps between data GMM components - ovdd_
243 : double get_self_overlap(unsigned id);
244 : // calculate overlap between two Gaussians
245 : double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
246 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
247 : // calculate exponent of overlap for neighbor list update
248 : double get_exp_overlap(const Vector &m_m, const Vector &d_m,
249 : const VectorGeneric<6> &inv_cov_md);
250 : // update the neighbor list
251 : void update_neighbor_list();
252 : // calculate overlap
253 : void calculate_overlap();
254 : // Gaussian noise
255 : void calculate_Gauss();
256 : // Outliers noise
257 : void calculate_Outliers();
258 : // Marginal noise
259 : void calculate_Marginal();
260 :
261 : // See MetainferenceBase
262 : void get_weights(double &weight, double &norm, double &neff);
263 :
264 : public:
265 : static void registerKeywords( Keywords& keys );
266 : explicit EMMI(const ActionOptions&);
267 : // needed for reweighting
268 : void setDerivatives();
269 : void turnOnDerivatives() override;
270 : unsigned getNumberOfDerivatives() override;
271 : void lockRequests() override;
272 : void unlockRequests() override;
273 : void calculateNumericalDerivatives( ActionWithValue* a ) override;
274 : void apply() override;
275 : void setArgDerivatives(Value *v, const double &d);
276 : void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
277 : void setBoxDerivatives(Value*v, const Tensor&d);
278 : // active methods:
279 : void prepare() override;
280 : void calculate() override;
281 : };
282 :
283 : inline
284 20 : void EMMI::setDerivatives() {
285 : // Get appropriate number of derivatives
286 : // Derivatives are first for arguments and then for atoms
287 : unsigned nder;
288 20 : if( getNumberOfAtoms()>0 ) {
289 20 : nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
290 : } else {
291 0 : nder = getNumberOfArguments();
292 : }
293 :
294 : // Resize all derivative arrays
295 20 : forces.resize( nder ); forcesToApply.resize( nder );
296 92 : for(int i=0; i<getNumberOfComponents(); ++i) getPntrToComponent(i)->resizeDerivatives(nder);
297 20 : }
298 :
299 : inline
300 20 : void EMMI::turnOnDerivatives() {
301 20 : ActionWithValue::turnOnDerivatives();
302 20 : }
303 :
304 : inline
305 72 : unsigned EMMI::getNumberOfDerivatives() {
306 72 : if( getNumberOfAtoms()>0 ) {
307 72 : return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
308 : }
309 0 : return getNumberOfArguments();
310 : }
311 :
312 : inline
313 30 : void EMMI::lockRequests() {
314 : ActionAtomistic::lockRequests();
315 : ActionWithArguments::lockRequests();
316 30 : }
317 :
318 : inline
319 30 : void EMMI::unlockRequests() {
320 : ActionAtomistic::unlockRequests();
321 : ActionWithArguments::unlockRequests();
322 30 : }
323 :
324 : inline
325 9 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
326 9 : if( getNumberOfArguments()>0 ) {
327 0 : ActionWithArguments::calculateNumericalDerivatives( a );
328 : }
329 9 : if( getNumberOfAtoms()>0 ) {
330 9 : Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
331 39 : for(int j=0; j<getNumberOfComponents(); ++j) {
332 30 : for(unsigned i=0; i<getNumberOfArguments(); ++i) if(getPntrToComponent(j)->hasDerivatives()) save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
333 : }
334 9 : calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
335 39 : for(int j=0; j<getNumberOfComponents(); ++j) {
336 30 : for(unsigned i=0; i<getNumberOfArguments(); ++i) if(getPntrToComponent(j)->hasDerivatives()) getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
337 : }
338 : }
339 9 : }
340 :
341 : inline
342 30 : void EMMI::apply() {
343 30 : bool wasforced=false; forcesToApply.assign(forcesToApply.size(),0.0);
344 162 : for(int i=0; i<getNumberOfComponents(); ++i) {
345 132 : if( getPntrToComponent(i)->applyForce( forces ) ) {
346 : wasforced=true;
347 0 : for(unsigned i=0; i<forces.size(); ++i) forcesToApply[i]+=forces[i];
348 : }
349 : }
350 30 : if( wasforced ) {
351 0 : addForcesOnArguments( forcesToApply );
352 0 : if( getNumberOfAtoms()>0 ) setForcesOnAtoms( forcesToApply, getNumberOfArguments() );
353 : }
354 30 : }
355 :
356 : inline
357 : void EMMI::setArgDerivatives(Value *v, const double &d) {
358 : v->addDerivative(0,d);
359 : }
360 :
361 : inline
362 9844626 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
363 9844626 : const unsigned noa=getNumberOfArguments();
364 9844626 : v->addDerivative(noa+3*i+0,d[0]);
365 9844626 : v->addDerivative(noa+3*i+1,d[1]);
366 9844626 : v->addDerivative(noa+3*i+2,d[2]);
367 9844626 : }
368 :
369 : inline
370 16365 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
371 16365 : const unsigned noa=getNumberOfArguments();
372 : const unsigned nat=getNumberOfAtoms();
373 16365 : v->addDerivative(noa+3*nat+0,d(0,0));
374 16365 : v->addDerivative(noa+3*nat+1,d(0,1));
375 16365 : v->addDerivative(noa+3*nat+2,d(0,2));
376 16365 : v->addDerivative(noa+3*nat+3,d(1,0));
377 16365 : v->addDerivative(noa+3*nat+4,d(1,1));
378 16365 : v->addDerivative(noa+3*nat+5,d(1,2));
379 16365 : v->addDerivative(noa+3*nat+6,d(2,0));
380 16365 : v->addDerivative(noa+3*nat+7,d(2,1));
381 16365 : v->addDerivative(noa+3*nat+8,d(2,2));
382 16365 : }
383 :
384 10459 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
385 :
386 21 : void EMMI::registerKeywords( Keywords& keys ) {
387 21 : Action::registerKeywords( keys );
388 21 : ActionAtomistic::registerKeywords( keys );
389 21 : ActionWithValue::registerKeywords( keys );
390 21 : ActionWithArguments::registerKeywords( keys );
391 21 : keys.use("ARG");
392 42 : keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
393 42 : keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
394 42 : keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
395 42 : keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
396 42 : keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
397 42 : keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
398 42 : keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
399 42 : keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
400 42 : keys.add("optional","SIGMA0","initial value of the uncertainty");
401 42 : keys.add("optional","DSIGMA","MC step for uncertainties");
402 42 : keys.add("optional","MC_STRIDE", "Monte Carlo stride");
403 42 : keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
404 42 : keys.add("optional","OV_FILE","file with experimental overlaps");
405 42 : keys.add("optional","NORM_DENSITY","integral of the experimental density");
406 42 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
407 42 : keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
408 42 : keys.add("optional","REGRESSION","regression stride");
409 42 : keys.add("optional","REG_SCALE_MIN","regression minimum scale");
410 42 : keys.add("optional","REG_SCALE_MAX","regression maximum scale");
411 42 : keys.add("optional","REG_DSCALE","regression maximum scale MC move");
412 42 : keys.add("optional","SCALE","scale factor");
413 42 : keys.add("optional","ANNEAL", "Length of annealing cycle");
414 42 : keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
415 42 : keys.add("optional","TEMP","temperature");
416 42 : keys.add("optional","PRIOR", "exponent of uncertainty prior");
417 42 : keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
418 42 : keys.add("optional","WRITE_OV","write a file with model overlaps");
419 42 : keys.add("optional","AVERAGING", "Averaging window for weights");
420 42 : keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
421 42 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
422 21 : componentsAreNotOptional(keys);
423 42 : keys.addOutputComponent("scoreb","default","Bayesian score");
424 42 : keys.addOutputComponent("acc", "NOISETYPE","MC acceptance for uncertainty");
425 42 : keys.addOutputComponent("scale", "REGRESSION","scale factor");
426 42 : keys.addOutputComponent("accscale", "REGRESSION","MC acceptance for scale regression");
427 42 : keys.addOutputComponent("enescale", "REGRESSION","MC energy for scale regression");
428 42 : keys.addOutputComponent("anneal","ANNEAL","annealing factor");
429 42 : keys.addOutputComponent("weight", "REWEIGHT", "weights of the weighted average");
430 42 : keys.addOutputComponent("biasDer", "REWEIGHT", "derivatives with respect to the bias");
431 42 : keys.addOutputComponent("sigma", "NOISETYPE", "uncertainty in the forward models and experiment");
432 42 : keys.addOutputComponent("neff", "default", "effective number of replicas");
433 21 : }
434 :
435 20 : EMMI::EMMI(const ActionOptions&ao):
436 : Action(ao),
437 : ActionAtomistic(ao),
438 : ActionWithArguments(ao),
439 : ActionWithValue(ao),
440 20 : inv_sqrt2_(0.707106781186548),
441 20 : sqrt2_pi_(0.797884560802865),
442 20 : first_time_(true), no_aver_(false), pbc_(true),
443 20 : MCstride_(1), MCaccept_(0.), MCtrials_(0.),
444 40 : statusstride_(0), first_status_(true),
445 20 : nregres_(0), scale_(1.),
446 20 : dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
447 40 : kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
448 20 : do_reweight_(false), first_time_w_(true), decay_w_(1.)
449 : {
450 : // periodic boundary conditions
451 20 : bool nopbc=!pbc_;
452 20 : parseFlag("NOPBC",nopbc);
453 20 : pbc_=!nopbc;
454 :
455 : // list of atoms
456 : std::vector<AtomNumber> atoms;
457 40 : parseAtomList("ATOMS", atoms);
458 :
459 : // file with data GMM
460 : std::string GMM_file;
461 40 : parse("GMM_FILE", GMM_file);
462 :
463 : // type of data noise
464 : std::string noise;
465 40 : parse("NOISETYPE",noise);
466 20 : if (noise=="GAUSS") noise_ = 0;
467 12 : else if(noise=="OUTLIERS") noise_ = 1;
468 6 : else if(noise=="MARGINAL") noise_ = 2;
469 0 : else error("Unknown noise type!");
470 :
471 : // minimum value for error
472 : double sigma_min;
473 20 : parse("SIGMA_MIN", sigma_min);
474 20 : if(sigma_min<0) error("SIGMA_MIN should be greater or equal to zero");
475 :
476 : // the following parameters must be specified with noise type 0 and 1
477 : double sigma_ini, dsigma;
478 20 : if(noise_!=2) {
479 : // initial value of the uncertainty
480 14 : parse("SIGMA0", sigma_ini);
481 14 : if(sigma_ini<=0) error("you must specify a positive SIGMA0");
482 : // MC parameters
483 14 : parse("DSIGMA", dsigma);
484 14 : if(dsigma<0) error("you must specify a positive DSIGMA");
485 14 : parse("MC_STRIDE", MCstride_);
486 14 : if(dsigma>0 && MCstride_<=0) error("you must specify a positive MC_STRIDE");
487 : // status file parameters
488 14 : parse("WRITE_STRIDE", statusstride_);
489 14 : if(statusstride_<=0) error("you must specify a positive WRITE_STRIDE");
490 28 : parse("STATUS_FILE", statusfilename_);
491 28 : if(statusfilename_=="") statusfilename_ = "MISTATUS"+getLabel();
492 0 : else statusfilename_ = statusfilename_+getLabel();
493 : }
494 :
495 : // error file
496 : std::string errfile;
497 40 : parse("ERR_FILE", errfile);
498 :
499 : // file with experimental overlaps
500 : std::string ovfile;
501 20 : parse("OV_FILE", ovfile);
502 :
503 : // integral of the experimetal density
504 20 : double norm_d = 0.0;
505 20 : parse("NORM_DENSITY", norm_d);
506 :
507 : // temperature
508 20 : double temp=0.0;
509 20 : parse("TEMP",temp);
510 : // convert temp to kbt
511 20 : if(temp>0.0) kbt_=plumed.getAtoms().getKBoltzmann()*temp;
512 0 : else kbt_=plumed.getAtoms().getKbT();
513 :
514 : // exponent of uncertainty prior
515 20 : parse("PRIOR",prior_);
516 :
517 : // simulated annealing stuff
518 20 : parse("ANNEAL", nanneal_);
519 20 : parse("ANNEAL_FACT", kanneal_);
520 20 : if(nanneal_>0 && kanneal_<=1.0) error("with ANNEAL, ANNEAL_FACT must be greater than 1");
521 :
522 : // regression stride
523 20 : parse("REGRESSION",nregres_);
524 : // other regression parameters
525 20 : if(nregres_>0) {
526 0 : parse("REG_SCALE_MIN",scale_min_);
527 0 : parse("REG_SCALE_MAX",scale_max_);
528 0 : parse("REG_DSCALE",dscale_);
529 : // checks
530 0 : if(scale_max_<=scale_min_) error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
531 0 : if(dscale_<=0.) error("with REGRESSION, REG_DSCALE must be positive");
532 : }
533 :
534 : // scale factor
535 20 : parse("SCALE", scale_);
536 :
537 : // read map resolution
538 : double reso;
539 20 : parse("RESOLUTION", reso);
540 20 : if(reso<=0.) error("RESOLUTION should be strictly positive");
541 :
542 : // neighbor list stuff
543 20 : parse("NL_CUTOFF",nl_cutoff_);
544 20 : if(nl_cutoff_<=0.0) error("NL_CUTOFF should be explicitly specified and positive");
545 20 : parse("NL_STRIDE",nl_stride_);
546 20 : if(nl_stride_<=0) error("NL_STRIDE should be explicitly specified and positive");
547 :
548 : // averaging or not
549 20 : parseFlag("NO_AVER",no_aver_);
550 :
551 : // write overlap file
552 20 : parse("WRITE_OV_STRIDE", ovstride_);
553 20 : parse("WRITE_OV", ovfilename_);
554 20 : if(ovstride_>0 && ovfilename_=="") error("With WRITE_OV_STRIDE you must specify WRITE_OV");
555 :
556 : // set parallel stuff
557 20 : size_=comm.Get_size();
558 20 : rank_=comm.Get_rank();
559 :
560 : // get number of replicas
561 20 : if(rank_==0) {
562 14 : if(no_aver_) {
563 12 : nrep_ = 1;
564 : } else {
565 2 : nrep_ = multi_sim_comm.Get_size();
566 : }
567 14 : replica_ = multi_sim_comm.Get_rank();
568 : } else {
569 6 : nrep_ = 0;
570 6 : replica_ = 0;
571 : }
572 20 : comm.Sum(&nrep_,1);
573 20 : comm.Sum(&replica_,1);
574 :
575 : // Reweighting flag
576 20 : parseFlag("REWEIGHT", do_reweight_);
577 20 : if(do_reweight_&&getNumberOfArguments()!=1) error("To REWEIGHT one must provide one single bias as an argument");
578 20 : if(do_reweight_&&no_aver_) error("REWEIGHT cannot be used with NO_AVER");
579 20 : if(do_reweight_&&nrep_<2) error("REWEIGHT can only be used in parallel with 2 or more replicas");
580 20 : if(!getRestart()) average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
581 0 : else average_weights_.resize(nrep_, 0.);
582 :
583 20 : unsigned averaging=0;
584 20 : parse("AVERAGING", averaging);
585 20 : if(averaging>0) {
586 2 : decay_w_ = 1./static_cast<double> (averaging);
587 : }
588 :
589 20 : checkRead();
590 :
591 20 : log.printf(" atoms involved : ");
592 10876 : for(unsigned i=0; i<atoms.size(); ++i) log.printf("%d ",atoms[i].serial());
593 20 : log.printf("\n");
594 20 : log.printf(" GMM data file : %s\n", GMM_file.c_str());
595 20 : if(no_aver_) log.printf(" without ensemble averaging\n");
596 20 : log.printf(" type of data noise : %s\n", noise.c_str());
597 20 : log.printf(" neighbor list cutoff : %lf\n", nl_cutoff_);
598 20 : log.printf(" neighbor list stride : %u\n", nl_stride_);
599 20 : log.printf(" minimum uncertainty : %f\n",sigma_min);
600 20 : log.printf(" scale factor : %lf\n",scale_);
601 20 : if(nregres_>0) {
602 0 : log.printf(" regression stride : %u\n", nregres_);
603 0 : log.printf(" regression minimum scale : %lf\n", scale_min_);
604 0 : log.printf(" regression maximum scale : %lf\n", scale_max_);
605 0 : log.printf(" regression maximum scale MC move : %lf\n", dscale_);
606 : }
607 20 : if(noise_!=2) {
608 14 : log.printf(" initial value of the uncertainty : %f\n",sigma_ini);
609 14 : log.printf(" max MC move in uncertainty : %f\n",dsigma);
610 14 : log.printf(" MC stride : %u\n", MCstride_);
611 14 : log.printf(" reading/writing to status file : %s\n",statusfilename_.c_str());
612 14 : log.printf(" with stride : %u\n",statusstride_);
613 : }
614 20 : if(errfile.size()>0) log.printf(" reading experimental errors from file : %s\n", errfile.c_str());
615 20 : if(ovfile.size()>0) log.printf(" reading experimental overlaps from file : %s\n", ovfile.c_str());
616 20 : log.printf(" temperature of the system in energy unit : %f\n",kbt_);
617 20 : log.printf(" prior exponent : %f\n",prior_);
618 20 : log.printf(" number of replicas for averaging: %u\n",nrep_);
619 20 : log.printf(" id of the replica : %u\n",replica_);
620 20 : if(nanneal_>0) {
621 0 : log.printf(" length of annealing cycle : %u\n",nanneal_);
622 0 : log.printf(" annealing factor : %f\n",kanneal_);
623 : }
624 20 : if(ovstride_>0) {
625 0 : log.printf(" stride for writing model overlaps : %u\n",ovstride_);
626 0 : log.printf(" file for writing model overlaps : %s\n", ovfilename_.c_str());
627 : }
628 :
629 : // set constant quantity before calculating stuff
630 20 : cfact_ = 1.0/pow( 2.0*pi, 1.5 );
631 :
632 : // calculate model GMM constant parameters
633 20 : std::vector<double> GMM_m_w = get_GMM_m(atoms);
634 :
635 : // read data GMM parameters
636 20 : get_GMM_d(GMM_file);
637 20 : log.printf(" number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
638 :
639 : // normalize atom weight map
640 40 : if(norm_d <= 0.0) norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
641 : double norm_m = accumulate(GMM_m_w.begin(), GMM_m_w.end(), 0.0);
642 : // renormalization
643 100 : for(unsigned i=0; i<GMM_m_w_.size(); ++i) GMM_m_w_[i] *= norm_d / norm_m;
644 :
645 : // read experimental errors
646 : std::vector<double> exp_err;
647 20 : if(errfile.size()>0) exp_err = read_exp_errors(errfile);
648 :
649 : // get self overlaps between data GMM components
650 20 : if(ovfile.size()>0) {
651 0 : ovdd_ = read_exp_overlaps(ovfile);
652 : } else {
653 1982 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
654 1962 : double ov = get_self_overlap(i);
655 1962 : ovdd_.push_back(ov);
656 : }
657 : }
658 :
659 20 : log.printf(" number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
660 : // cycle on GMM groups
661 40 : for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
662 20 : log.printf(" group %d\n", Gid);
663 : // calculate median overlap and experimental error
664 : std::vector<double> ovdd;
665 : std::vector<double> err;
666 : // cycle on the group members
667 1982 : for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
668 : // GMM id
669 1962 : int GMMid = GMM_d_grps_[Gid][i];
670 : // add to experimental error
671 1962 : if(errfile.size()>0) err.push_back(exp_err[GMMid]);
672 1962 : else err.push_back(0.);
673 : // add to GMM overlap
674 1962 : ovdd.push_back(ovdd_[GMMid]);
675 : }
676 : // calculate median quantities
677 20 : double ovdd_m = get_median(ovdd);
678 20 : double err_m = get_median(err);
679 : // print out statistics
680 20 : log.printf(" # of members : %zu\n", GMM_d_grps_[Gid].size());
681 20 : log.printf(" median overlap : %lf\n", ovdd_m);
682 20 : log.printf(" median error : %lf\n", err_m);
683 : // add minimum value of sigma for this group of GMMs
684 20 : sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
685 : // these are only needed with Gaussian and Outliers noise models
686 20 : if(noise_!=2) {
687 : // set dsigma
688 14 : dsigma_.push_back(dsigma * ovdd_m);
689 : // set sigma max
690 14 : sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
691 : // initialize sigma
692 28 : sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
693 : }
694 : }
695 :
696 : // read status file if restarting
697 20 : if(getRestart() && noise_!=2) read_status();
698 :
699 : // calculate auxiliary stuff
700 20 : calculate_useful_stuff(reso);
701 :
702 : // prepare data and derivative std::vectors
703 20 : ovmd_.resize(ovdd_.size());
704 20 : ovmd_ave_.resize(ovdd_.size());
705 20 : atom_der_.resize(GMM_m_type_.size());
706 20 : GMMid_der_.resize(ovdd_.size());
707 :
708 : // clear things that are no longer needed
709 : GMM_d_cov_.clear();
710 :
711 : // add components
712 40 : addComponentWithDerivatives("scoreb"); componentIsNotPeriodic("scoreb");
713 :
714 48 : if(noise_!=2) {addComponent("acc"); componentIsNotPeriodic("acc");}
715 :
716 20 : if(nregres_>0) {
717 0 : addComponent("scale"); componentIsNotPeriodic("scale");
718 0 : addComponent("accscale"); componentIsNotPeriodic("accscale");
719 0 : addComponent("enescale"); componentIsNotPeriodic("enescale");
720 : }
721 :
722 20 : if(nanneal_>0) {addComponent("anneal"); componentIsNotPeriodic("anneal");}
723 :
724 20 : if(do_reweight_) {
725 2 : addComponent("biasDer");
726 2 : componentIsNotPeriodic("biasDer");
727 2 : addComponent("weight");
728 4 : componentIsNotPeriodic("weight");
729 : }
730 :
731 20 : addComponent("neff");
732 20 : componentIsNotPeriodic("neff");
733 :
734 34 : for(unsigned i=0; i<sigma_.size(); ++i) {
735 14 : std::string num; Tools::convert(i, num);
736 28 : addComponent("sigma-"+num); componentIsNotPeriodic("sigma-"+num);
737 28 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
738 : }
739 :
740 : // initialize random seed
741 : unsigned iseed;
742 20 : if(rank_==0) iseed = time(NULL)+replica_;
743 6 : else iseed = 0;
744 20 : comm.Sum(&iseed, 1);
745 20 : random_.setSeed(-iseed);
746 :
747 : // request the atoms
748 20 : requestAtoms(atoms);
749 20 : setDerivatives();
750 :
751 : // print bibliography
752 40 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
753 40 : log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
754 40 : log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
755 24 : if(do_reweight_) log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
756 24 : if(!no_aver_ && nrep_>1)log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
757 20 : log<<"\n";
758 20 : }
759 :
760 0 : void EMMI::write_model_overlap(long int step)
761 : {
762 0 : OFile ovfile;
763 0 : ovfile.link(*this);
764 0 : std::string num; Tools::convert(step,num);
765 0 : std::string name = ovfilename_+"-"+num;
766 0 : ovfile.open(name);
767 : ovfile.setHeavyFlush();
768 0 : ovfile.fmtField("%10.7e ");
769 : // write overlaps
770 0 : for(int i=0; i<ovmd_.size(); ++i) {
771 0 : ovfile.printField("Model", ovmd_[i]);
772 0 : ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
773 0 : ovfile.printField("Data", ovdd_[i]);
774 0 : ovfile.printField();
775 : }
776 0 : ovfile.close();
777 0 : }
778 :
779 40 : double EMMI::get_median(std::vector<double> &v)
780 : {
781 : // dimension of std::vector
782 40 : unsigned size = v.size();
783 : // in case of only one entry
784 40 : if (size==1) {
785 0 : return v[0];
786 : } else {
787 : // reorder std::vector
788 40 : sort(v.begin(), v.end());
789 : // odd or even?
790 40 : if (size%2==0) {
791 4 : return (v[size/2-1]+v[size/2])/2.0;
792 : } else {
793 36 : return v[size/2];
794 : }
795 : }
796 : }
797 :
798 0 : void EMMI::read_status()
799 : {
800 : double MDtime;
801 : // open file
802 0 : auto ifile = Tools::make_unique<IFile>();
803 0 : ifile->link(*this);
804 0 : if(ifile->FileExist(statusfilename_)) {
805 0 : ifile->open(statusfilename_);
806 0 : while(ifile->scanField("MD_time", MDtime)) {
807 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
808 : // convert i to std::string
809 0 : std::string num; Tools::convert(i,num);
810 : // read entries
811 0 : ifile->scanField("s"+num, sigma_[i]);
812 : }
813 : // new line
814 0 : ifile->scanField();
815 : }
816 0 : ifile->close();
817 : } else {
818 0 : error("Cannot find status file "+statusfilename_+"\n");
819 : }
820 0 : }
821 :
822 10904 : void EMMI::print_status(long int step)
823 : {
824 : // if first time open the file
825 10904 : if(first_status_) {
826 14 : first_status_ = false;
827 14 : statusfile_.link(*this);
828 14 : statusfile_.open(statusfilename_);
829 : statusfile_.setHeavyFlush();
830 28 : statusfile_.fmtField("%6.3e ");
831 : }
832 : // write fields
833 10904 : double MDtime = static_cast<double>(step)*getTimeStep();
834 10904 : statusfile_.printField("MD_time", MDtime);
835 21808 : for(unsigned i=0; i<sigma_.size(); ++i) {
836 : // convert i to std::string
837 10904 : std::string num; Tools::convert(i,num);
838 : // print entry
839 21808 : statusfile_.printField("s"+num, sigma_[i]);
840 : }
841 10904 : statusfile_.printField();
842 10904 : }
843 :
844 0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
845 : bool accept = false;
846 : // calculate delta energy
847 0 : double delta = ( newE - oldE ) / kbt;
848 : // if delta is negative always accept move
849 0 : if( delta < 0.0 ) {
850 : accept = true;
851 : } else {
852 : // otherwise extract random number
853 0 : double s = random_.RandU01();
854 0 : if( s < std::exp(-delta) ) { accept = true; }
855 : }
856 0 : return accept;
857 : }
858 :
859 0 : void EMMI::doMonteCarlo()
860 : {
861 : // extract random GMM group
862 0 : unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
863 0 : if(nGMM==GMM_d_grps_.size()) nGMM -= 1;
864 :
865 : // generate random move
866 0 : double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
867 : // new sigma
868 0 : double new_s = sigma_[nGMM] + shift;
869 : // check boundaries
870 0 : if(new_s > sigma_max_[nGMM]) {new_s = 2.0 * sigma_max_[nGMM] - new_s;}
871 0 : if(new_s < sigma_min_[nGMM]) {new_s = 2.0 * sigma_min_[nGMM] - new_s;}
872 : // old s2
873 0 : double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
874 : // new s2
875 0 : double new_inv_s2 = 1.0 / new_s / new_s;
876 :
877 : // cycle on GMM group and calculate old and new energy
878 : double old_ene = 0.0;
879 : double new_ene = 0.0;
880 0 : double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
881 :
882 : // in case of Gaussian noise
883 0 : if(noise_==0) {
884 : double chi2 = 0.0;
885 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
886 : // id GMM component
887 0 : int GMMid = GMM_d_grps_[nGMM][i];
888 : // deviation
889 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
890 : // add to chi2
891 0 : chi2 += dev * dev;
892 : }
893 : // final energy calculation: add normalization and prior
894 0 : old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
895 0 : new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
896 : }
897 :
898 : // in case of Outliers noise
899 0 : if(noise_==1) {
900 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
901 : // id GMM component
902 0 : int GMMid = GMM_d_grps_[nGMM][i];
903 : // calculate deviation
904 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
905 : // add to energies
906 0 : old_ene += std::log( 1.0 + 0.5 * dev * dev * old_inv_s2);
907 0 : new_ene += std::log( 1.0 + 0.5 * dev * dev * new_inv_s2);
908 : }
909 : // final energy calculation: add normalization and prior
910 0 : old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
911 0 : new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
912 : }
913 :
914 : // increment number of trials
915 0 : MCtrials_ += 1.0;
916 :
917 : // accept or reject
918 0 : bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
919 0 : if(accept) {
920 0 : sigma_[nGMM] = new_s;
921 0 : MCaccept_ += 1.0;
922 : }
923 : // local communication
924 0 : if(rank_!=0) {
925 0 : for(unsigned i=0; i<sigma_.size(); ++i) sigma_[i] = 0.0;
926 0 : MCaccept_ = 0.0;
927 : }
928 0 : if(size_>1) {
929 0 : comm.Sum(&sigma_[0], sigma_.size());
930 0 : comm.Sum(&MCaccept_, 1);
931 : }
932 :
933 : // update sigma output
934 0 : std::string num; Tools::convert(nGMM, num);
935 0 : getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
936 0 : }
937 :
938 0 : std::vector<double> EMMI::read_exp_errors(std::string errfile)
939 : {
940 : int nexp, idcomp;
941 : double err;
942 : std::vector<double> exp_err;
943 : // open file
944 0 : IFile *ifile = new IFile();
945 0 : if(ifile->FileExist(errfile)) {
946 0 : ifile->open(errfile);
947 : // scan for number of experimental errors
948 0 : ifile->scanField("Nexp", nexp);
949 : // cycle on GMM components
950 0 : while(ifile->scanField("Id",idcomp)) {
951 : // total experimental error
952 0 : double err_tot = 0.0;
953 : // cycle on number of experimental overlaps
954 0 : for(unsigned i=0; i<nexp; ++i) {
955 0 : std::string ss; Tools::convert(i,ss);
956 0 : ifile->scanField("Err"+ss, err);
957 : // add to total error
958 0 : err_tot += err*err;
959 : }
960 : // new line
961 0 : ifile->scanField();
962 : // calculate RMSE
963 0 : err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
964 : // add to global
965 0 : exp_err.push_back(err_tot);
966 : }
967 0 : ifile->close();
968 : } else {
969 0 : error("Cannot find ERR_FILE "+errfile+"\n");
970 : }
971 0 : return exp_err;
972 : }
973 :
974 0 : std::vector<double> EMMI::read_exp_overlaps(std::string ovfile)
975 : {
976 : int idcomp;
977 : double ov;
978 : std::vector<double> ovdd;
979 : // open file
980 0 : IFile *ifile = new IFile();
981 0 : if(ifile->FileExist(ovfile)) {
982 0 : ifile->open(ovfile);
983 : // cycle on GMM components
984 0 : while(ifile->scanField("Id",idcomp)) {
985 : // read experimental overlap
986 0 : ifile->scanField("Overlap", ov);
987 : // add to ovdd
988 0 : ovdd.push_back(ov);
989 : // new line
990 0 : ifile->scanField();
991 : }
992 0 : ifile->close();
993 : } else {
994 0 : error("Cannot find OV_FILE "+ovfile+"\n");
995 : }
996 0 : return ovdd;
997 : }
998 :
999 20 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms)
1000 : {
1001 : // list of weights - one per atom
1002 : std::vector<double> GMM_m_w;
1003 :
1004 20 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
1005 : // map of atom types to A and B coefficients of scattering factor
1006 : // f(s) = A * exp(-B*s**2)
1007 : // B is in Angstrom squared
1008 : // map between an atom type and an index
1009 : std::map<std::string, unsigned> type_map;
1010 20 : type_map["C"]=0;
1011 20 : type_map["O"]=1;
1012 20 : type_map["N"]=2;
1013 20 : type_map["S"]=3;
1014 : // fill in sigma std::vector
1015 20 : GMM_m_s_.push_back(0.01*15.146); // type 0
1016 20 : GMM_m_s_.push_back(0.01*8.59722); // type 1
1017 20 : GMM_m_s_.push_back(0.01*11.1116); // type 2
1018 20 : GMM_m_s_.push_back(0.01*15.8952); // type 3
1019 : // fill in weight std::vector
1020 20 : GMM_m_w_.push_back(2.49982); // type 0
1021 20 : GMM_m_w_.push_back(1.97692); // type 1
1022 20 : GMM_m_w_.push_back(2.20402); // type 2
1023 20 : GMM_m_w_.push_back(5.14099); // type 3
1024 :
1025 : // check if MOLINFO line is present
1026 20 : if( moldat ) {
1027 20 : log<<" MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
1028 10876 : for(unsigned i=0; i<atoms.size(); ++i) {
1029 : // get atom name
1030 10856 : std::string name = moldat->getAtomName(atoms[i]);
1031 : char type;
1032 : // get atom type
1033 10856 : char first = name.at(0);
1034 : // GOLDEN RULE: type is first letter, if not a number
1035 10856 : if (!isdigit(first)) {
1036 : type = first;
1037 : // otherwise is the second
1038 : } else {
1039 0 : type = name.at(1);
1040 : }
1041 : // check if key in map
1042 10856 : std::string type_s = std::string(1,type);
1043 10856 : if(type_map.find(type_s) != type_map.end()) {
1044 : // save atom type
1045 10856 : GMM_m_type_.push_back(type_map[type_s]);
1046 : // this will be normalized in the final density
1047 10856 : GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
1048 : } else {
1049 0 : error("Wrong atom type "+type_s+" from atom name "+name+"\n");
1050 : }
1051 : }
1052 : } else {
1053 0 : error("MOLINFO DATA not found\n");
1054 : }
1055 20 : return GMM_m_w;
1056 : }
1057 :
1058 1962 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w)
1059 : {
1060 :
1061 : // check if positive defined, by calculating the 3 leading principal minors
1062 1962 : double pm1 = cov[0];
1063 1962 : double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
1064 1962 : double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
1065 : // apply Sylvester’s criterion
1066 1962 : if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0)
1067 0 : error("check data GMM: covariance matrix is not positive defined");
1068 :
1069 : // check if weight is positive
1070 1962 : if(w<=0) error("check data GMM: weight must be positive");
1071 1962 : }
1072 :
1073 : // read GMM data file in PLUMED format:
1074 20 : void EMMI::get_GMM_d(std::string GMM_file)
1075 : {
1076 20 : VectorGeneric<6> cov;
1077 :
1078 : // open file
1079 20 : auto ifile=Tools::make_unique<IFile>();
1080 20 : if(ifile->FileExist(GMM_file)) {
1081 20 : ifile->open(GMM_file);
1082 : int idcomp;
1083 3964 : while(ifile->scanField("Id",idcomp)) {
1084 : int beta;
1085 : double w, m0, m1, m2;
1086 3924 : ifile->scanField("Weight",w);
1087 3924 : ifile->scanField("Mean_0",m0);
1088 3924 : ifile->scanField("Mean_1",m1);
1089 3924 : ifile->scanField("Mean_2",m2);
1090 3924 : ifile->scanField("Cov_00",cov[0]);
1091 3924 : ifile->scanField("Cov_01",cov[1]);
1092 3924 : ifile->scanField("Cov_02",cov[2]);
1093 3924 : ifile->scanField("Cov_11",cov[3]);
1094 3924 : ifile->scanField("Cov_12",cov[4]);
1095 3924 : ifile->scanField("Cov_22",cov[5]);
1096 1962 : ifile->scanField("Beta",beta);
1097 : // check input
1098 1962 : check_GMM_d(cov, w);
1099 : // check beta
1100 1962 : if(beta<0) error("Beta must be positive!");
1101 : // center of the Gaussian
1102 1962 : GMM_d_m_.push_back(Vector(m0,m1,m2));
1103 : // covariance matrix
1104 1962 : GMM_d_cov_.push_back(cov);
1105 : // weight
1106 1962 : GMM_d_w_.push_back(w);
1107 : // beta
1108 1962 : GMM_d_beta_.push_back(beta);
1109 : // new line
1110 1962 : ifile->scanField();
1111 : }
1112 : } else {
1113 0 : error("Cannot find GMM_FILE "+GMM_file+"\n");
1114 : }
1115 : // now create a set from beta (unique set of values)
1116 20 : std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
1117 : // now prepare the group std::vector
1118 20 : GMM_d_grps_.resize(bu.size());
1119 : // and fill it in
1120 1982 : for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
1121 1962 : if(GMM_d_beta_[i]>=GMM_d_grps_.size()) error("Check Beta values");
1122 1962 : GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
1123 : }
1124 20 : }
1125 :
1126 20 : void EMMI::calculate_useful_stuff(double reso)
1127 : {
1128 : // We use the following definition for resolution:
1129 : // the Fourier transform of the density distribution in real space
1130 : // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
1131 : // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
1132 : // average value of B
1133 : double Bave = 0.0;
1134 10876 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1135 10856 : Bave += GMM_m_s_[GMM_m_type_[i]];
1136 : }
1137 20 : Bave /= static_cast<double>(GMM_m_type_.size());
1138 : // calculate blur factor
1139 : double blur = 0.0;
1140 20 : if(reso*reso>Bave) blur = reso*reso-Bave;
1141 36 : else warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
1142 : // add blur to B
1143 100 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) GMM_m_s_[i] += blur;
1144 : // calculate average resolution
1145 : double ave_res = 0.0;
1146 10876 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1147 10856 : ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
1148 : }
1149 20 : ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
1150 20 : log.printf(" experimental map resolution : %3.2f\n", reso);
1151 20 : log.printf(" predicted map resolution : %3.2f\n", ave_res);
1152 20 : log.printf(" blur factor : %f\n", blur);
1153 : // now calculate useful stuff
1154 20 : VectorGeneric<6> cov, sum, inv_sum;
1155 : // cycle on all atoms types (4 for the moment)
1156 100 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
1157 : // the Gaussian in density (real) space is the FT of scattering factor
1158 : // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
1159 80 : double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
1160 : // covariance matrix for spherical Gaussian
1161 80 : cov[0]=s*s; cov[1]=0.0; cov[2]=0.0;
1162 80 : cov[3]=s*s; cov[4]=0.0;
1163 80 : cov[5]=s*s;
1164 : // cycle on all data GMM
1165 7928 : for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
1166 : // we need the sum of the covariance matrices
1167 54936 : for(unsigned k=0; k<6; ++k) sum[k] = cov[k] + GMM_d_cov_[j][k];
1168 : // and to calculate its determinant
1169 7848 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1170 7848 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1171 7848 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1172 : // calculate prefactor - model weights are already normalized
1173 7848 : double pre_fact = cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
1174 : // and its inverse
1175 7848 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1176 7848 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1177 7848 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1178 7848 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1179 7848 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1180 7848 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1181 : // now we store the prefactor
1182 7848 : pre_fact_.push_back(pre_fact);
1183 : // and the inverse of the sum
1184 7848 : inv_cov_md_.push_back(inv_sum);
1185 : }
1186 : }
1187 : // tabulate exponential
1188 20 : dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
1189 20000020 : for(unsigned i=0; i<nexp_; ++i) {
1190 20000000 : tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
1191 : }
1192 20 : }
1193 :
1194 : // get prefactors
1195 1621458 : double EMMI::get_prefactor_inverse
1196 : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
1197 : double GMM_w_0, double GMM_w_1,
1198 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum)
1199 : {
1200 : // we need the sum of the covariance matrices
1201 11350206 : for(unsigned k=0; k<6; ++k) sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
1202 :
1203 : // and to calculate its determinant
1204 1621458 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1205 1621458 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1206 1621458 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1207 :
1208 : // the prefactor is
1209 1621458 : double pre_fact = cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
1210 :
1211 : // and its inverse
1212 1621458 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1213 1621458 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1214 1621458 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1215 1621458 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1216 1621458 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1217 1621458 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1218 :
1219 : // return pre-factor
1220 1621458 : return pre_fact;
1221 : }
1222 :
1223 1962 : double EMMI::get_self_overlap(unsigned id)
1224 : {
1225 : double ov_tot = 0.0;
1226 1962 : VectorGeneric<6> sum, inv_sum;
1227 1962 : Vector ov_der;
1228 : // start loop
1229 1623420 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
1230 : // call auxiliary method
1231 1621458 : double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
1232 1621458 : GMM_d_w_[id], GMM_d_w_[i], sum, inv_sum);
1233 : // add overlap to ov_tot
1234 1621458 : ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
1235 : }
1236 : // and return it
1237 1962 : return ov_tot;
1238 : }
1239 :
1240 : // get overlap and derivatives
1241 3732816 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
1242 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der)
1243 : {
1244 3732816 : Vector md;
1245 : // calculate std::vector difference m_m-d_m with/without pbc
1246 3732816 : if(pbc_) md = pbcDistance(d_m, m_m);
1247 3732816 : else md = delta(d_m, m_m);
1248 : // calculate product of transpose of md and inv_cov_md
1249 3732816 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1250 3732816 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1251 3732816 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1252 : // calculate product of prod and md
1253 3732816 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1254 : // final calculation
1255 3732816 : ov = pre_fact * std::exp(-0.5*ov);
1256 : // derivatives
1257 3732816 : ov_der = ov * Vector(p_x, p_y, p_z);
1258 3732816 : return ov;
1259 : }
1260 :
1261 : // get the exponent of the overlap
1262 59085036 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
1263 : const VectorGeneric<6> &inv_cov_md)
1264 : {
1265 59085036 : Vector md;
1266 : // calculate std::vector difference m_m-d_m with/without pbc
1267 59085036 : if(pbc_) md = pbcDistance(d_m, m_m);
1268 59085036 : else md = delta(d_m, m_m);
1269 : // calculate product of transpose of md and inv_cov_md
1270 59085036 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1271 59085036 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1272 59085036 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1273 : // calculate product of prod and md
1274 59085036 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1275 59085036 : return ov;
1276 : }
1277 :
1278 16355 : void EMMI::update_neighbor_list()
1279 : {
1280 : // dimension of GMM and atom std::vectors
1281 16355 : unsigned GMM_d_size = GMM_d_m_.size();
1282 16355 : unsigned GMM_m_size = GMM_m_type_.size();
1283 : // local neighbor list
1284 : std::vector < unsigned > nl_l;
1285 : // clear old neighbor list
1286 16355 : nl_.clear();
1287 :
1288 : // cycle on GMM components - in parallel
1289 116273 : for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
1290 : // overlap lists and map
1291 : std::vector<double> ov_l;
1292 : std::map<double, unsigned> ov_m;
1293 : // total overlap with id
1294 : double ov_tot = 0.0;
1295 : // cycle on all atoms
1296 59184954 : for(unsigned im=0; im<GMM_m_size; ++im) {
1297 : // get index in auxiliary lists
1298 59085036 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1299 : // calculate exponent of overlap
1300 59085036 : double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
1301 : // get index of 0.5*expov in tabulated exponential
1302 59085036 : unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
1303 : // check boundaries and skip atom in case
1304 59085036 : if(itab >= tab_exp_.size()) continue;
1305 : // in case calculate overlap
1306 4133388 : double ov = pre_fact_[kaux] * tab_exp_[itab];
1307 : // add to list
1308 4133388 : ov_l.push_back(ov);
1309 : // and map to retrieve atom index
1310 4133388 : ov_m[ov] = im;
1311 : // increase ov_tot
1312 4133388 : ov_tot += ov;
1313 : }
1314 : // check if zero size -> ov_tot = 0
1315 99918 : if(ov_l.size()==0) continue;
1316 : // define cutoff
1317 99842 : double ov_cut = ov_tot * nl_cutoff_;
1318 : // sort ov_l in ascending order
1319 99842 : std::sort(ov_l.begin(), ov_l.end());
1320 : // integrate ov_l
1321 : double res = 0.0;
1322 2146102 : for(unsigned i=0; i<ov_l.size(); ++i) {
1323 2146102 : res += ov_l[i];
1324 : // if exceeding the cutoff for overlap, stop
1325 2146102 : if(res >= ov_cut) break;
1326 : else ov_m.erase(ov_l[i]);
1327 : }
1328 : // now add atoms to neighborlist
1329 2186970 : for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it)
1330 2087128 : nl_l.push_back(id*GMM_m_size+it->second);
1331 : // end cycle on GMM components in parallel
1332 : }
1333 : // find total dimension of neighborlist
1334 16355 : std::vector <int> recvcounts(size_, 0);
1335 16355 : recvcounts[rank_] = nl_l.size();
1336 16355 : comm.Sum(&recvcounts[0], size_);
1337 : int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
1338 : // resize neighbor stuff
1339 16355 : nl_.resize(tot_size);
1340 : // calculate std::vector of displacement
1341 16355 : std::vector<int> disp(size_);
1342 16355 : disp[0] = 0;
1343 : int rank_size = 0;
1344 27257 : for(unsigned i=0; i<size_-1; ++i) {
1345 10902 : rank_size += recvcounts[i];
1346 10902 : disp[i+1] = rank_size;
1347 : }
1348 : // Allgather neighbor list
1349 16355 : comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
1350 : // now resize derivatives
1351 16355 : ovmd_der_.resize(tot_size);
1352 16355 : }
1353 :
1354 30 : void EMMI::prepare()
1355 : {
1356 30 : if(getExchangeStep()) first_time_=true;
1357 30 : }
1358 :
1359 : // overlap calculator
1360 16365 : void EMMI::calculate_overlap() {
1361 :
1362 16365 : if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
1363 16355 : update_neighbor_list();
1364 16355 : first_time_=false;
1365 : }
1366 :
1367 : // clean temporary std::vectors
1368 174342 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] = 0.0;
1369 3168864 : for(unsigned i=0; i<ovmd_der_.size(); ++i) ovmd_der_[i] = Vector(0,0,0);
1370 :
1371 : // we have to cycle over all model and data GMM components in the neighbor list
1372 16365 : unsigned GMM_d_size = GMM_d_m_.size();
1373 16365 : unsigned GMM_m_size = GMM_m_type_.size();
1374 2127723 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1375 : // get data (id) and atom (im) indexes
1376 2111358 : unsigned id = nl_[i] / GMM_m_size;
1377 2111358 : unsigned im = nl_[i] % GMM_m_size;
1378 : // get index in auxiliary lists
1379 2111358 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1380 : // add overlap with im component of model GMM
1381 2111358 : ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
1382 2111358 : inv_cov_md_[kaux], ovmd_der_[i]);
1383 : }
1384 : // communicate stuff
1385 16365 : if(size_>1) {
1386 10902 : comm.Sum(&ovmd_[0], ovmd_.size());
1387 10902 : comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
1388 : }
1389 16365 : }
1390 :
1391 0 : double EMMI::scaleEnergy(double s)
1392 : {
1393 : double ene = 0.0;
1394 0 : for(unsigned i=0; i<ovdd_.size(); ++i) {
1395 0 : ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
1396 : }
1397 0 : return ene;
1398 : }
1399 :
1400 0 : double EMMI::doRegression()
1401 : {
1402 : // standard MC parameters
1403 : unsigned MCsteps = 100000;
1404 : double kbtmin = 1.0;
1405 : double kbtmax = 10.0;
1406 : unsigned ncold = 5000;
1407 : unsigned nhot = 2000;
1408 : double MCacc = 0.0;
1409 : double kbt, ebest, scale_best;
1410 :
1411 : // initial value of scale factor and energy
1412 0 : double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
1413 0 : double ene = scaleEnergy(scale);
1414 : // set best energy
1415 0 : ebest = ene;
1416 :
1417 : // MC loop
1418 0 : for(unsigned istep=0; istep<MCsteps; ++istep) {
1419 : // get temperature
1420 0 : if(istep%(ncold+nhot)<ncold) kbt = kbtmin;
1421 : else kbt = kbtmax;
1422 : // propose move in scale
1423 0 : double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
1424 0 : double new_scale = scale + ds;
1425 : // check boundaries
1426 0 : if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;}
1427 0 : if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;}
1428 : // new energy
1429 0 : double new_ene = scaleEnergy(new_scale);
1430 : // accept or reject
1431 0 : bool accept = doAccept(ene, new_ene, kbt);
1432 : // in case of acceptance
1433 0 : if(accept) {
1434 : scale = new_scale;
1435 : ene = new_ene;
1436 0 : MCacc += 1.0;
1437 : }
1438 : // save best
1439 0 : if(ene<ebest) {
1440 0 : ebest = ene;
1441 0 : scale_best = scale;
1442 : }
1443 : }
1444 : // calculate acceptance
1445 0 : double accscale = MCacc / static_cast<double>(MCsteps);
1446 : // global communication
1447 0 : if(!no_aver_ && nrep_>1) {
1448 0 : if(replica_!=0) {
1449 0 : scale_best = 0.0;
1450 0 : ebest = 0.0;
1451 0 : accscale = 0.0;
1452 : }
1453 0 : if(rank_==0) {
1454 0 : multi_sim_comm.Sum(&scale_best, 1);
1455 0 : multi_sim_comm.Sum(&ebest, 1);
1456 0 : multi_sim_comm.Sum(&accscale, 1);
1457 : }
1458 : }
1459 : // local communication
1460 0 : if(rank_!=0) {
1461 0 : scale_best = 0.0;
1462 0 : ebest = 0.0;
1463 0 : accscale = 0.0;
1464 : }
1465 0 : if(size_>1) {
1466 0 : comm.Sum(&scale_best, 1);
1467 0 : comm.Sum(&ebest, 1);
1468 0 : comm.Sum(&accscale, 1);
1469 : }
1470 : // set scale parameters
1471 0 : getPntrToComponent("accscale")->set(accscale);
1472 0 : getPntrToComponent("enescale")->set(ebest);
1473 : // return scale value
1474 0 : return scale_best;
1475 : }
1476 :
1477 0 : double EMMI::get_annealing(long int step)
1478 : {
1479 : // default no annealing
1480 : double fact = 1.0;
1481 : // position in annealing cycle
1482 0 : unsigned nc = step%(4*nanneal_);
1483 : // useful doubles
1484 0 : double ncd = static_cast<double>(nc);
1485 0 : double nn = static_cast<double>(nanneal_);
1486 : // set fact
1487 0 : if(nc>=nanneal_ && nc<2*nanneal_) fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
1488 0 : if(nc>=2*nanneal_ && nc<3*nanneal_) fact = kanneal_;
1489 0 : if(nc>=3*nanneal_) fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
1490 0 : return fact;
1491 : }
1492 :
1493 16365 : void EMMI::get_weights(double &weight, double &norm, double &neff)
1494 : {
1495 16365 : const double dnrep = static_cast<double>(nrep_);
1496 : // calculate the weights either from BIAS
1497 16365 : if(do_reweight_) {
1498 12 : std::vector<double> bias(nrep_,0);
1499 12 : if(rank_==0) {
1500 12 : bias[replica_] = getArgument(0);
1501 12 : if(nrep_>1) multi_sim_comm.Sum(&bias[0], nrep_);
1502 : }
1503 12 : comm.Sum(&bias[0], nrep_);
1504 :
1505 : // accumulate weights
1506 12 : if(!first_time_w_) {
1507 30 : for(unsigned i=0; i<nrep_; ++i) {
1508 20 : const double delta=bias[i]-average_weights_[i];
1509 : // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
1510 : // probably because we're making several calls to calculate(), causing accumulation
1511 : // of epsilons. Maybe we can work on a temporary copy of the action instead?
1512 20 : average_weights_[i]+=decay_w_*delta;
1513 : }
1514 : } else {
1515 2 : first_time_w_ = false;
1516 6 : for(unsigned i=0; i<nrep_; ++i) average_weights_[i] = bias[i];
1517 : }
1518 :
1519 : // set average back into bias and set norm to one
1520 12 : const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
1521 36 : for(unsigned i=0; i<nrep_; ++i) bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
1522 : // set local weight, norm and weight variance
1523 12 : weight = bias[replica_];
1524 : double w2=0.;
1525 36 : for(unsigned i=0; i<nrep_; ++i) {
1526 24 : w2 += bias[i]*bias[i];
1527 24 : norm += bias[i];
1528 : }
1529 12 : neff = norm*norm/w2;
1530 24 : getPntrToComponent("weight")->set(weight/norm);
1531 : } else {
1532 : // or arithmetic ones
1533 16353 : neff = dnrep;
1534 16353 : weight = 1.0;
1535 16353 : norm = dnrep;
1536 : }
1537 16365 : getPntrToComponent("neff")->set(neff);
1538 16365 : }
1539 :
1540 16365 : void EMMI::calculate()
1541 : {
1542 :
1543 : // calculate CV
1544 16365 : calculate_overlap();
1545 :
1546 : // rescale factor for ensemble average
1547 16365 : double weight = 0.;
1548 16365 : double neff = 0.;
1549 16365 : double norm = 0.;
1550 16365 : get_weights(weight, norm, neff);
1551 :
1552 : // in case of ensemble averaging, calculate average overlap
1553 16365 : if(!no_aver_ && nrep_>1) {
1554 : // if master node, calculate average across replicas
1555 12 : if(rank_==0) {
1556 10812 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] = weight / norm * ovmd_[i];
1557 12 : multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1558 : } else {
1559 0 : for(unsigned i=0; i<ovmd_ave_.size(); ++i) ovmd_ave_[i] = 0.0;
1560 : }
1561 : // local communication
1562 12 : if(size_>1) comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1563 : } else {
1564 163530 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] = ovmd_[i];
1565 : }
1566 :
1567 : // get time step
1568 16365 : long int step = getStep();
1569 :
1570 : // do regression
1571 16365 : if(nregres_>0) {
1572 0 : if(step%nregres_==0 && !getExchangeStep()) scale_ = doRegression();
1573 : // set scale component
1574 0 : getPntrToComponent("scale")->set(scale_);
1575 : }
1576 :
1577 : // write model overlap to file
1578 16365 : if(ovstride_>0 && step%ovstride_==0) write_model_overlap(step);
1579 :
1580 : // clear energy and virial
1581 16365 : ene_ = 0.0;
1582 16365 : virial_.zero();
1583 :
1584 : // Gaussian noise
1585 16365 : if(noise_==0) calculate_Gauss();
1586 :
1587 : // Outliers noise
1588 16365 : if(noise_==1) calculate_Outliers();
1589 :
1590 : // Marginal noise
1591 16365 : if(noise_==2) calculate_Marginal();
1592 :
1593 : // get annealing rescale factor
1594 16365 : if(nanneal_>0) {
1595 0 : anneal_ = get_annealing(step);
1596 0 : getPntrToComponent("anneal")->set(anneal_);
1597 : }
1598 :
1599 : // annealing rescale
1600 16365 : ene_ /= anneal_;
1601 :
1602 16365 : std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
1603 : // in case of ensemble averaging
1604 16365 : if(!no_aver_ && nrep_>1) {
1605 : // if master node, sum der_GMMid derivatives and ene
1606 12 : if(rank_==0) {
1607 10812 : for(unsigned i=0; i<GMMid_der_.size(); ++i) {
1608 10800 : GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
1609 : }
1610 12 : multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1611 12 : multi_sim_comm.Sum(&ene_, 1);
1612 : } else {
1613 : // set der_GMMid derivatives and energy to zero
1614 0 : for(unsigned i=0; i<GMMid_der_av_.size(); ++i) GMMid_der_av_[i]=0.0;
1615 0 : ene_ = 0.0;
1616 : }
1617 : // local communication
1618 12 : if(size_>1) {
1619 0 : comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1620 0 : comm.Sum(&ene_, 1);
1621 : }
1622 : } else {
1623 163530 : for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
1624 147177 : GMMid_der_av_[i] = GMMid_der_[i];
1625 : }
1626 : }
1627 :
1628 : // clean temporary std::vector
1629 9860991 : for(unsigned i=0; i<atom_der_.size(); ++i) atom_der_[i] = Vector(0,0,0);
1630 :
1631 : // get derivatives of bias with respect to atoms
1632 2127723 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1633 : // get indexes of data and model component
1634 2111358 : unsigned id = nl_[i] / GMM_m_type_.size();
1635 2111358 : unsigned im = nl_[i] % GMM_m_type_.size();
1636 : // chain rule + replica normalization
1637 2111358 : Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
1638 2111358 : Vector pos;
1639 2111358 : if(pbc_) pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
1640 2111358 : else pos = getPosition(im);
1641 : // increment derivatives and virial
1642 2111358 : atom_der_[im] += tot_der;
1643 2111358 : virial_ += Tensor(pos, -tot_der);
1644 : }
1645 :
1646 : // communicate local derivatives and virial
1647 16365 : if(size_>1) {
1648 10902 : comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
1649 10902 : comm.Sum(virial_);
1650 : }
1651 :
1652 : // set derivatives, virial, and score
1653 9860991 : for(unsigned i=0; i<atom_der_.size(); ++i) setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
1654 16365 : setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
1655 16365 : getPntrToComponent("scoreb")->set(ene_);
1656 :
1657 16365 : if (do_reweight_) {
1658 : double w_tmp = 0.;
1659 10812 : for (unsigned i = 0; i < ovmd_.size(); ++i) {
1660 10800 : w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
1661 : }
1662 12 : w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
1663 :
1664 12 : setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
1665 24 : getPntrToComponent("biasDer")->set(w_tmp);
1666 : }
1667 :
1668 : // This part is needed only for Gaussian and Outliers noise models
1669 16365 : if(noise_!=2) {
1670 :
1671 : // do Montecarlo
1672 10914 : if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) doMonteCarlo();
1673 :
1674 : // print status
1675 10914 : if(step%statusstride_==0) print_status(step);
1676 :
1677 : // calculate acceptance ratio
1678 10914 : double acc = MCaccept_ / MCtrials_;
1679 :
1680 : // set value
1681 21828 : getPntrToComponent("acc")->set(acc);
1682 :
1683 : }
1684 :
1685 16365 : }
1686 :
1687 5463 : void EMMI::calculate_Gauss()
1688 : {
1689 : // cycle on all the GMM groups
1690 10926 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1691 : double eneg = 0.0;
1692 : // cycle on all the members of the group
1693 65322 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1694 : // id of the GMM component
1695 59859 : int GMMid = GMM_d_grps_[i][j];
1696 : // calculate deviation
1697 59859 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1698 : // add to group energy
1699 59859 : eneg += 0.5 * dev * dev;
1700 : // store derivative for later
1701 59859 : GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
1702 : }
1703 : // add to total energy along with normalizations and prior
1704 5463 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1705 : }
1706 5463 : }
1707 :
1708 5451 : void EMMI::calculate_Outliers()
1709 : {
1710 : // cycle on all the GMM groups
1711 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1712 : // cycle on all the members of the group
1713 : double eneg = 0.0;
1714 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1715 : // id of the GMM component
1716 49059 : int GMMid = GMM_d_grps_[i][j];
1717 : // calculate deviation
1718 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1719 : // add to group energy
1720 49059 : eneg += std::log( 1.0 + 0.5 * dev * dev );
1721 : // store derivative for later
1722 49059 : GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
1723 : }
1724 : // add to total energy along with normalizations and prior
1725 5451 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1726 : }
1727 5451 : }
1728 :
1729 5451 : void EMMI::calculate_Marginal()
1730 : {
1731 : // cycle on all the GMM groups
1732 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1733 : // cycle on all the members of the group
1734 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1735 : // id of the GMM component
1736 49059 : int GMMid = GMM_d_grps_[i][j];
1737 : // calculate deviation
1738 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
1739 : // calculate errf
1740 49059 : double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
1741 : // add to group energy
1742 49059 : ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
1743 : // store derivative for later
1744 49059 : GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
1745 : }
1746 : }
1747 5451 : }
1748 :
1749 : }
1750 : }
|