Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2017-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionAtomistic.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "core/ActionRegister.h"
26 : #include "core/PlumedMain.h"
27 : #include "tools/Matrix.h"
28 : #include "tools/Communicator.h"
29 : #include "core/GenericMolInfo.h"
30 : #include "core/ActionSet.h"
31 : #include "tools/File.h"
32 : #include "tools/Random.h"
33 :
34 : #include <string>
35 : #include <map>
36 : #include <numeric>
37 : #include <ctime>
38 :
39 : namespace PLMD {
40 : namespace isdb {
41 :
42 : //+PLUMEDOC ISDB_COLVAR EMMI
43 : /*
44 : Calculate the fit of a structure or ensemble of structures with a cryo-EM density map.
45 :
46 : This action implements the multi-scale Bayesian approach to cryo-EM data fitting introduced in Ref. \cite Hanot113951 .
47 : This method allows efficient and accurate structural modeling of cryo-electron microscopy density maps at multiple scales, from coarse-grained to atomistic resolution, by addressing the presence of random and systematic errors in the data, sample heterogeneity, data correlation, and noise correlation.
48 :
49 : The experimental density map is fit by a Gaussian Mixture Model (GMM), which is provided as an external file specified by the keyword
50 : GMM_FILE. We are currently working on a web server to perform
51 : this operation. In the meantime, the user can request a stand-alone version of the GMM code at massimiliano.bonomi_AT_gmail.com.
52 :
53 : When run in single-replica mode, this action allows atomistic, flexible refinement of an individual structure into a density map.
54 : Combined with a multi-replica framework (such as the -multi option in GROMACS), the user can model an ensemble of structures using
55 : the Metainference approach \cite Bonomi:2016ip .
56 :
57 : \warning
58 : To use \ref EMMI, the user should always add a \ref MOLINFO line and specify a pdb file of the system.
59 :
60 : \note
61 : To enhance sampling in single-structure refinement, one can use a Replica Exchange Method, such as Parallel Tempering.
62 : In this case, the user should add the NO_AVER flag to the input line. To use a replica-based enhanced sampling scheme such as
63 : Parallel-Bias Metadynamics (\ref PBMETAD), one should use the REWEIGHT flag and pass the Metadynamics bias using the ARG keyword.
64 :
65 : \note
66 : \ref EMMI can be used in combination with periodic and non-periodic systems. In the latter case, one should
67 : add the NOPBC flag to the input line
68 :
69 : \par Examples
70 :
71 : In this example, we perform a single-structure refinement based on an experimental cryo-EM map. The map is fit with a GMM, whose
72 : parameters are listed in the file GMM_fit.dat. This file contains one line per GMM component in the following format:
73 :
74 : \plumedfile
75 : #! FIELDS Id Weight Mean_0 Mean_1 Mean_2 Cov_00 Cov_01 Cov_02 Cov_11 Cov_12 Cov_22 Beta
76 : 0 2.9993805e+01 6.54628 10.37820 -0.92988 2.078920e-02 1.216254e-03 5.990827e-04 2.556246e-02 8.411835e-03 2.486254e-02 1
77 : 1 2.3468312e+01 6.56095 10.34790 -0.87808 1.879859e-02 6.636049e-03 3.682865e-04 3.194490e-02 1.750524e-03 3.017100e-02 1
78 : ...
79 : \endplumedfile
80 :
81 : To accelerate the computation of the Bayesian score, one can:
82 : - use neighbor lists, specified by the keywords NL_CUTOFF and NL_STRIDE;
83 : - calculate the restraint every other step (or more).
84 :
85 : All the heavy atoms of the system are used to calculate the density map. This list can conveniently be provided
86 : using a GROMACS index file.
87 :
88 : The input file looks as follows:
89 :
90 : \plumedfile
91 : # include pdb info
92 : MOLINFO STRUCTURE=prot.pdb
93 :
94 : # all heavy atoms
95 : protein-h: GROUP NDX_FILE=index.ndx NDX_GROUP=Protein-H
96 :
97 : # create EMMI score
98 : gmm: EMMI NOPBC SIGMA_MIN=0.01 TEMP=300.0 NL_STRIDE=100 NL_CUTOFF=0.01 GMM_FILE=GMM_fit.dat ATOMS=protein-h
99 :
100 : # translate into bias - apply every 2 steps
101 : emr: BIASVALUE ARG=gmm.scoreb STRIDE=2
102 :
103 : PRINT ARG=emr.* FILE=COLVAR STRIDE=500 FMT=%20.10f
104 : \endplumedfile
105 :
106 :
107 : */
108 : //+ENDPLUMEDOC
109 :
110 : class EMMI :
111 : public ActionAtomistic,
112 : public ActionWithArguments,
113 : public ActionWithValue
114 : {
115 : private:
116 :
117 : // temperature in kbt
118 : double kbt_;
119 : // model GMM - atom types
120 : std::vector<unsigned> GMM_m_type_;
121 : // model GMM - list of atom sigmas - one per atom type
122 : std::vector<double> GMM_m_s_;
123 : // model GMM - list of atom weights - one per atom type
124 : std::vector<double> GMM_m_w_;
125 : // data GMM - means, weights, and covariances + beta option
126 : std::vector<Vector> GMM_d_m_;
127 : std::vector<double> GMM_d_w_;
128 : std::vector< VectorGeneric<6> > GMM_d_cov_;
129 : std::vector<int> GMM_d_beta_;
130 : std::vector < std::vector<int> > GMM_d_grps_;
131 : // overlaps
132 : std::vector<double> ovmd_;
133 : std::vector<double> ovdd_;
134 : std::vector<double> ovmd_ave_;
135 : // and derivatives
136 : std::vector<Vector> ovmd_der_;
137 : std::vector<Vector> atom_der_;
138 : std::vector<double> GMMid_der_;
139 : // constants
140 : double cfact_;
141 : double inv_sqrt2_, sqrt2_pi_;
142 : // metainference
143 : unsigned nrep_;
144 : unsigned replica_;
145 : std::vector<double> sigma_;
146 : std::vector<double> sigma_min_;
147 : std::vector<double> sigma_max_;
148 : std::vector<double> dsigma_;
149 : // list of prefactors for overlap between two Gaussians
150 : // pre_fact = 1.0 / (2pi)**1.5 / std::sqrt(det_md) * Wm * Wd
151 : std::vector<double> pre_fact_;
152 : // inverse of the sum of model and data covariances matrices
153 : std::vector< VectorGeneric<6> > inv_cov_md_;
154 : // neighbor list
155 : double nl_cutoff_;
156 : unsigned nl_stride_;
157 : bool first_time_;
158 : bool no_aver_;
159 : std::vector<unsigned> nl_;
160 : // parallel stuff
161 : unsigned size_;
162 : unsigned rank_;
163 : // pbc
164 : bool pbc_;
165 : // Monte Carlo stuff
166 : int MCstride_;
167 : double MCaccept_;
168 : double MCtrials_;
169 : Random random_;
170 : // status stuff
171 : unsigned int statusstride_;
172 : std::string statusfilename_;
173 : OFile statusfile_;
174 : bool first_status_;
175 : // regression
176 : unsigned nregres_;
177 : double scale_;
178 : double scale_min_;
179 : double scale_max_;
180 : double dscale_;
181 : // tabulated exponential
182 : double dpcutoff_;
183 : double dexp_;
184 : unsigned nexp_;
185 : std::vector<double> tab_exp_;
186 : // simulated annealing
187 : unsigned nanneal_;
188 : double kanneal_;
189 : double anneal_;
190 : // prior exponent
191 : double prior_;
192 : // noise type
193 : unsigned noise_;
194 : // total score and virial;
195 : double ene_;
196 : Tensor virial_;
197 : // model overlap file
198 : unsigned int ovstride_;
199 : std::string ovfilename_;
200 :
201 : // Reweighting additions
202 : bool do_reweight_;
203 : bool first_time_w_;
204 : std::vector<double> forces;
205 : std::vector<double> forcesToApply;
206 :
207 : // average weights
208 : double decay_w_;
209 : std::vector<double> average_weights_;
210 :
211 : // write file with model overlap
212 : void write_model_overlap(long long int step);
213 : // get median of std::vector
214 : double get_median(std::vector<double> &v);
215 : // annealing
216 : double get_annealing(long long int step);
217 : // do regression
218 : double scaleEnergy(double s);
219 : double doRegression();
220 : // read and write status
221 : void read_status();
222 : void print_status(long long int step);
223 : // accept or reject
224 : bool doAccept(double oldE, double newE, double kbt);
225 : // do MonteCarlo
226 : void doMonteCarlo();
227 : // read error file
228 : std::vector<double> read_exp_errors(const std::string & errfile);
229 : // read experimental overlaps
230 : std::vector<double> read_exp_overlaps(const std::string & ovfile);
231 : // calculate model GMM weights and covariances
232 : std::vector<double> get_GMM_m(std::vector<AtomNumber> &atoms);
233 : // read data GMM file
234 : void get_GMM_d(const std::string & gmm_file);
235 : // check GMM data
236 : void check_GMM_d(const VectorGeneric<6> &cov, double w);
237 : // auxiliary method
238 : void calculate_useful_stuff(double reso);
239 : // get pref_fact and inv_cov_md
240 : double get_prefactor_inverse (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
241 : double GMM_w_0, double GMM_w_1,
242 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum);
243 : // calculate self overlaps between data GMM components - ovdd_
244 : double get_self_overlap(unsigned id);
245 : // calculate overlap between two Gaussians
246 : double get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
247 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der);
248 : // calculate exponent of overlap for neighbor list update
249 : double get_exp_overlap(const Vector &m_m, const Vector &d_m,
250 : const VectorGeneric<6> &inv_cov_md);
251 : // update the neighbor list
252 : void update_neighbor_list();
253 : // calculate overlap
254 : void calculate_overlap();
255 : // Gaussian noise
256 : void calculate_Gauss();
257 : // Outliers noise
258 : void calculate_Outliers();
259 : // Marginal noise
260 : void calculate_Marginal();
261 :
262 : // See MetainferenceBase
263 : void get_weights(double &weight, double &norm, double &neff);
264 :
265 : public:
266 : static void registerKeywords( Keywords& keys );
267 : explicit EMMI(const ActionOptions&);
268 : // needed for reweighting
269 : void setDerivatives();
270 : void turnOnDerivatives() override;
271 : unsigned getNumberOfDerivatives() override;
272 : void lockRequests() override;
273 : void unlockRequests() override;
274 : void calculateNumericalDerivatives( ActionWithValue* a ) override;
275 : void apply() override;
276 : void setArgDerivatives(Value *v, const double &d);
277 : void setAtomsDerivatives(Value*v, const unsigned i, const Vector&d);
278 : void setBoxDerivatives(Value*v, const Tensor&d);
279 : // active methods:
280 : void prepare() override;
281 : void calculate() override;
282 : };
283 :
284 : inline
285 20 : void EMMI::setDerivatives() {
286 : // Get appropriate number of derivatives
287 : // Derivatives are first for arguments and then for atoms
288 : unsigned nder;
289 20 : if( getNumberOfAtoms()>0 ) {
290 20 : nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
291 : } else {
292 0 : nder = getNumberOfArguments();
293 : }
294 :
295 : // Resize all derivative arrays
296 20 : forces.resize( nder ); forcesToApply.resize( nder );
297 92 : for(int i=0; i<getNumberOfComponents(); ++i) getPntrToComponent(i)->resizeDerivatives(nder);
298 20 : }
299 :
300 : inline
301 20 : void EMMI::turnOnDerivatives() {
302 20 : ActionWithValue::turnOnDerivatives();
303 20 : }
304 :
305 : inline
306 72 : unsigned EMMI::getNumberOfDerivatives() {
307 72 : if( getNumberOfAtoms()>0 ) {
308 72 : return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
309 : }
310 0 : return getNumberOfArguments();
311 : }
312 :
313 : inline
314 30 : void EMMI::lockRequests() {
315 : ActionAtomistic::lockRequests();
316 : ActionWithArguments::lockRequests();
317 30 : }
318 :
319 : inline
320 30 : void EMMI::unlockRequests() {
321 : ActionAtomistic::unlockRequests();
322 : ActionWithArguments::unlockRequests();
323 30 : }
324 :
325 : inline
326 9 : void EMMI::calculateNumericalDerivatives( ActionWithValue* a=NULL ) {
327 9 : if( getNumberOfArguments()>0 ) {
328 0 : ActionWithArguments::calculateNumericalDerivatives( a );
329 : }
330 9 : if( getNumberOfAtoms()>0 ) {
331 9 : Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
332 39 : for(int j=0; j<getNumberOfComponents(); ++j) {
333 30 : for(unsigned i=0; i<getNumberOfArguments(); ++i) if(getPntrToComponent(j)->hasDerivatives()) save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
334 : }
335 9 : calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
336 39 : for(int j=0; j<getNumberOfComponents(); ++j) {
337 30 : for(unsigned i=0; i<getNumberOfArguments(); ++i) if(getPntrToComponent(j)->hasDerivatives()) getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
338 : }
339 : }
340 9 : }
341 :
342 : inline
343 30 : void EMMI::apply() {
344 30 : bool wasforced=false; forcesToApply.assign(forcesToApply.size(),0.0);
345 162 : for(int i=0; i<getNumberOfComponents(); ++i) {
346 132 : if( getPntrToComponent(i)->applyForce( forces ) ) {
347 : wasforced=true;
348 0 : for(unsigned i=0; i<forces.size(); ++i) forcesToApply[i]+=forces[i];
349 : }
350 : }
351 30 : if( wasforced ) {
352 0 : unsigned ind=0; addForcesOnArguments( 0, forcesToApply, ind, getLabel() );
353 0 : if( getNumberOfAtoms()>0 ) setForcesOnAtoms( forcesToApply, ind );
354 : }
355 30 : }
356 :
357 : inline
358 : void EMMI::setArgDerivatives(Value *v, const double &d) {
359 : v->addDerivative(0,d);
360 : }
361 :
362 : inline
363 9844626 : void EMMI::setAtomsDerivatives(Value*v, const unsigned i, const Vector&d) {
364 9844626 : const unsigned noa=getNumberOfArguments();
365 9844626 : v->addDerivative(noa+3*i+0,d[0]);
366 9844626 : v->addDerivative(noa+3*i+1,d[1]);
367 9844626 : v->addDerivative(noa+3*i+2,d[2]);
368 9844626 : }
369 :
370 : inline
371 16365 : void EMMI::setBoxDerivatives(Value* v,const Tensor&d) {
372 16365 : const unsigned noa=getNumberOfArguments();
373 : const unsigned nat=getNumberOfAtoms();
374 16365 : v->addDerivative(noa+3*nat+0,d(0,0));
375 16365 : v->addDerivative(noa+3*nat+1,d(0,1));
376 16365 : v->addDerivative(noa+3*nat+2,d(0,2));
377 16365 : v->addDerivative(noa+3*nat+3,d(1,0));
378 16365 : v->addDerivative(noa+3*nat+4,d(1,1));
379 16365 : v->addDerivative(noa+3*nat+5,d(1,2));
380 16365 : v->addDerivative(noa+3*nat+6,d(2,0));
381 16365 : v->addDerivative(noa+3*nat+7,d(2,1));
382 16365 : v->addDerivative(noa+3*nat+8,d(2,2));
383 16365 : }
384 :
385 : PLUMED_REGISTER_ACTION(EMMI,"EMMI")
386 :
387 22 : void EMMI::registerKeywords( Keywords& keys ) {
388 22 : Action::registerKeywords( keys );
389 22 : ActionAtomistic::registerKeywords( keys );
390 22 : ActionWithValue::registerKeywords( keys );
391 22 : ActionWithArguments::registerKeywords( keys );
392 44 : keys.addInputKeyword("optional","ARG","scalar","the labels of the values from which the function is calculated");
393 44 : keys.add("atoms","ATOMS","atoms for which we calculate the density map, typically all heavy atoms");
394 44 : keys.addFlag("NOPBC",false,"ignore the periodic boundary conditions when calculating distances");
395 44 : keys.add("compulsory","GMM_FILE","file with the parameters of the GMM components");
396 44 : keys.add("compulsory","NL_CUTOFF","The cutoff in overlap for the neighbor list");
397 44 : keys.add("compulsory","NL_STRIDE","The frequency with which we are updating the neighbor list");
398 44 : keys.add("compulsory","SIGMA_MIN","minimum uncertainty");
399 44 : keys.add("compulsory","RESOLUTION", "Cryo-EM map resolution");
400 44 : keys.add("compulsory","NOISETYPE","functional form of the noise (GAUSS, OUTLIERS, MARGINAL)");
401 44 : keys.add("optional","SIGMA0","initial value of the uncertainty");
402 44 : keys.add("optional","DSIGMA","MC step for uncertainties");
403 44 : keys.add("optional","MC_STRIDE", "Monte Carlo stride");
404 44 : keys.add("optional","ERR_FILE","file with experimental or GMM fit errors");
405 44 : keys.add("optional","OV_FILE","file with experimental overlaps");
406 44 : keys.add("optional","NORM_DENSITY","integral of the experimental density");
407 44 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart");
408 44 : keys.add("optional","WRITE_STRIDE","write the status to a file every N steps, this can be used for restart");
409 44 : keys.add("optional","REGRESSION","regression stride");
410 44 : keys.add("optional","REG_SCALE_MIN","regression minimum scale");
411 44 : keys.add("optional","REG_SCALE_MAX","regression maximum scale");
412 44 : keys.add("optional","REG_DSCALE","regression maximum scale MC move");
413 44 : keys.add("optional","SCALE","scale factor");
414 44 : keys.add("optional","ANNEAL", "Length of annealing cycle");
415 44 : keys.add("optional","ANNEAL_FACT", "Annealing temperature factor");
416 44 : keys.add("optional","TEMP","temperature");
417 44 : keys.add("optional","PRIOR", "exponent of uncertainty prior");
418 44 : keys.add("optional","WRITE_OV_STRIDE","write model overlaps every N steps");
419 44 : keys.add("optional","WRITE_OV","write a file with model overlaps");
420 44 : keys.add("optional","AVERAGING", "Averaging window for weights");
421 44 : keys.addFlag("NO_AVER",false,"don't do ensemble averaging in multi-replica mode");
422 44 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the ARG as energy");
423 44 : keys.addOutputComponent("scoreb","default","scalar","Bayesian score");
424 44 : keys.addOutputComponent("acc", "NOISETYPE","scalar","MC acceptance for uncertainty");
425 44 : keys.addOutputComponent("scale", "REGRESSION","scalar","scale factor");
426 44 : keys.addOutputComponent("accscale", "REGRESSION","scalar","MC acceptance for scale regression");
427 44 : keys.addOutputComponent("enescale", "REGRESSION","scalar","MC energy for scale regression");
428 44 : keys.addOutputComponent("anneal","ANNEAL","scalar","annealing factor");
429 44 : keys.addOutputComponent("weight", "REWEIGHT","scalar", "weights of the weighted average");
430 44 : keys.addOutputComponent("biasDer", "REWEIGHT","scalar", "derivatives with respect to the bias");
431 44 : keys.addOutputComponent("sigma", "NOISETYPE","scalar", "uncertainty in the forward models and experiment");
432 44 : keys.addOutputComponent("neff", "default","scalar", "effective number of replicas");
433 22 : }
434 :
435 20 : EMMI::EMMI(const ActionOptions&ao):
436 : Action(ao),
437 : ActionAtomistic(ao),
438 : ActionWithArguments(ao),
439 : ActionWithValue(ao),
440 20 : inv_sqrt2_(0.707106781186548),
441 20 : sqrt2_pi_(0.797884560802865),
442 20 : first_time_(true), no_aver_(false), pbc_(true),
443 20 : MCstride_(1), MCaccept_(0.), MCtrials_(0.),
444 40 : statusstride_(0), first_status_(true),
445 20 : nregres_(0), scale_(1.),
446 20 : dpcutoff_(15.0), nexp_(1000000), nanneal_(0),
447 40 : kanneal_(0.), anneal_(1.), prior_(1.), ovstride_(0),
448 20 : do_reweight_(false), first_time_w_(true), decay_w_(1.)
449 : {
450 : // periodic boundary conditions
451 20 : bool nopbc=!pbc_;
452 20 : parseFlag("NOPBC",nopbc);
453 20 : pbc_=!nopbc;
454 :
455 : // list of atoms
456 : std::vector<AtomNumber> atoms;
457 40 : parseAtomList("ATOMS", atoms);
458 :
459 : // file with data GMM
460 : std::string GMM_file;
461 40 : parse("GMM_FILE", GMM_file);
462 :
463 : // type of data noise
464 : std::string noise;
465 40 : parse("NOISETYPE",noise);
466 20 : if (noise=="GAUSS") noise_ = 0;
467 12 : else if(noise=="OUTLIERS") noise_ = 1;
468 6 : else if(noise=="MARGINAL") noise_ = 2;
469 0 : else error("Unknown noise type!");
470 :
471 : // minimum value for error
472 : double sigma_min;
473 20 : parse("SIGMA_MIN", sigma_min);
474 20 : if(sigma_min<0) error("SIGMA_MIN should be greater or equal to zero");
475 :
476 : // the following parameters must be specified with noise type 0 and 1
477 : double sigma_ini, dsigma;
478 20 : if(noise_!=2) {
479 : // initial value of the uncertainty
480 14 : parse("SIGMA0", sigma_ini);
481 14 : if(sigma_ini<=0) error("you must specify a positive SIGMA0");
482 : // MC parameters
483 14 : parse("DSIGMA", dsigma);
484 14 : if(dsigma<0) error("you must specify a positive DSIGMA");
485 14 : parse("MC_STRIDE", MCstride_);
486 14 : if(dsigma>0 && MCstride_<=0) error("you must specify a positive MC_STRIDE");
487 : // status file parameters
488 14 : parse("WRITE_STRIDE", statusstride_);
489 14 : if(statusstride_==0) error("you must specify a positive WRITE_STRIDE");
490 28 : parse("STATUS_FILE", statusfilename_);
491 28 : if(statusfilename_=="") statusfilename_ = "MISTATUS"+getLabel();
492 0 : else statusfilename_ = statusfilename_+getLabel();
493 : }
494 :
495 : // error file
496 : std::string errfile;
497 40 : parse("ERR_FILE", errfile);
498 :
499 : // file with experimental overlaps
500 : std::string ovfile;
501 20 : parse("OV_FILE", ovfile);
502 :
503 : // integral of the experimetal density
504 20 : double norm_d = 0.0;
505 20 : parse("NORM_DENSITY", norm_d);
506 :
507 : // temperature
508 20 : kbt_ = getkBT();
509 :
510 : // exponent of uncertainty prior
511 20 : parse("PRIOR",prior_);
512 :
513 : // simulated annealing stuff
514 20 : parse("ANNEAL", nanneal_);
515 20 : parse("ANNEAL_FACT", kanneal_);
516 20 : if(nanneal_>0 && kanneal_<=1.0) error("with ANNEAL, ANNEAL_FACT must be greater than 1");
517 :
518 : // regression stride
519 20 : parse("REGRESSION",nregres_);
520 : // other regression parameters
521 20 : if(nregres_>0) {
522 0 : parse("REG_SCALE_MIN",scale_min_);
523 0 : parse("REG_SCALE_MAX",scale_max_);
524 0 : parse("REG_DSCALE",dscale_);
525 : // checks
526 0 : if(scale_max_<=scale_min_) error("with REGRESSION, REG_SCALE_MAX must be greater than REG_SCALE_MIN");
527 0 : if(dscale_<=0.) error("with REGRESSION, REG_DSCALE must be positive");
528 : }
529 :
530 : // scale factor
531 20 : parse("SCALE", scale_);
532 :
533 : // read map resolution
534 : double reso;
535 20 : parse("RESOLUTION", reso);
536 20 : if(reso<=0.) error("RESOLUTION should be strictly positive");
537 :
538 : // neighbor list stuff
539 20 : parse("NL_CUTOFF",nl_cutoff_);
540 20 : if(nl_cutoff_<=0.0) error("NL_CUTOFF should be explicitly specified and positive");
541 20 : parse("NL_STRIDE",nl_stride_);
542 20 : if(nl_stride_==0) error("NL_STRIDE should be explicitly specified and positive");
543 :
544 : // averaging or not
545 20 : parseFlag("NO_AVER",no_aver_);
546 :
547 : // write overlap file
548 20 : parse("WRITE_OV_STRIDE", ovstride_);
549 20 : parse("WRITE_OV", ovfilename_);
550 20 : if(ovstride_>0 && ovfilename_=="") error("With WRITE_OV_STRIDE you must specify WRITE_OV");
551 :
552 : // set parallel stuff
553 20 : size_=comm.Get_size();
554 20 : rank_=comm.Get_rank();
555 :
556 : // get number of replicas
557 20 : if(rank_==0) {
558 14 : if(no_aver_) {
559 12 : nrep_ = 1;
560 : } else {
561 2 : nrep_ = multi_sim_comm.Get_size();
562 : }
563 14 : replica_ = multi_sim_comm.Get_rank();
564 : } else {
565 6 : nrep_ = 0;
566 6 : replica_ = 0;
567 : }
568 20 : comm.Sum(&nrep_,1);
569 20 : comm.Sum(&replica_,1);
570 :
571 : // Reweighting flag
572 20 : parseFlag("REWEIGHT", do_reweight_);
573 20 : if(do_reweight_&&getNumberOfArguments()!=1) error("To REWEIGHT one must provide one single bias as an argument");
574 20 : if(do_reweight_&&no_aver_) error("REWEIGHT cannot be used with NO_AVER");
575 20 : if(do_reweight_&&nrep_<2) error("REWEIGHT can only be used in parallel with 2 or more replicas");
576 20 : if(!getRestart()) average_weights_.resize(nrep_, 1./static_cast<double>(nrep_));
577 0 : else average_weights_.resize(nrep_, 0.);
578 :
579 20 : unsigned averaging=0;
580 20 : parse("AVERAGING", averaging);
581 20 : if(averaging>0) {
582 2 : decay_w_ = 1./static_cast<double> (averaging);
583 : }
584 :
585 20 : checkRead();
586 :
587 20 : log.printf(" atoms involved : ");
588 10876 : for(unsigned i=0; i<atoms.size(); ++i) log.printf("%d ",atoms[i].serial());
589 20 : log.printf("\n");
590 20 : log.printf(" GMM data file : %s\n", GMM_file.c_str());
591 20 : if(no_aver_) log.printf(" without ensemble averaging\n");
592 20 : log.printf(" type of data noise : %s\n", noise.c_str());
593 20 : log.printf(" neighbor list cutoff : %lf\n", nl_cutoff_);
594 20 : log.printf(" neighbor list stride : %u\n", nl_stride_);
595 20 : log.printf(" minimum uncertainty : %f\n",sigma_min);
596 20 : log.printf(" scale factor : %lf\n",scale_);
597 20 : if(nregres_>0) {
598 0 : log.printf(" regression stride : %u\n", nregres_);
599 0 : log.printf(" regression minimum scale : %lf\n", scale_min_);
600 0 : log.printf(" regression maximum scale : %lf\n", scale_max_);
601 0 : log.printf(" regression maximum scale MC move : %lf\n", dscale_);
602 : }
603 20 : if(noise_!=2) {
604 14 : log.printf(" initial value of the uncertainty : %f\n",sigma_ini);
605 14 : log.printf(" max MC move in uncertainty : %f\n",dsigma);
606 14 : log.printf(" MC stride : %u\n", MCstride_);
607 14 : log.printf(" reading/writing to status file : %s\n",statusfilename_.c_str());
608 14 : log.printf(" with stride : %u\n",statusstride_);
609 : }
610 20 : if(errfile.size()>0) log.printf(" reading experimental errors from file : %s\n", errfile.c_str());
611 20 : if(ovfile.size()>0) log.printf(" reading experimental overlaps from file : %s\n", ovfile.c_str());
612 20 : log.printf(" temperature of the system in energy unit : %f\n",kbt_);
613 20 : log.printf(" prior exponent : %f\n",prior_);
614 20 : log.printf(" number of replicas for averaging: %u\n",nrep_);
615 20 : log.printf(" id of the replica : %u\n",replica_);
616 20 : if(nanneal_>0) {
617 0 : log.printf(" length of annealing cycle : %u\n",nanneal_);
618 0 : log.printf(" annealing factor : %f\n",kanneal_);
619 : }
620 20 : if(ovstride_>0) {
621 0 : log.printf(" stride for writing model overlaps : %u\n",ovstride_);
622 0 : log.printf(" file for writing model overlaps : %s\n", ovfilename_.c_str());
623 : }
624 :
625 : // set constant quantity before calculating stuff
626 20 : cfact_ = 1.0/pow( 2.0*pi, 1.5 );
627 :
628 : // calculate model GMM constant parameters
629 20 : std::vector<double> GMM_m_w = get_GMM_m(atoms);
630 :
631 : // read data GMM parameters
632 20 : get_GMM_d(GMM_file);
633 20 : log.printf(" number of GMM components : %u\n", static_cast<unsigned>(GMM_d_m_.size()));
634 :
635 : // normalize atom weight map
636 40 : if(norm_d <= 0.0) norm_d = accumulate(GMM_d_w_.begin(), GMM_d_w_.end(), 0.0);
637 : double norm_m = accumulate(GMM_m_w.begin(), GMM_m_w.end(), 0.0);
638 : // renormalization
639 100 : for(unsigned i=0; i<GMM_m_w_.size(); ++i) GMM_m_w_[i] *= norm_d / norm_m;
640 :
641 : // read experimental errors
642 : std::vector<double> exp_err;
643 20 : if(errfile.size()>0) exp_err = read_exp_errors(errfile);
644 :
645 : // get self overlaps between data GMM components
646 20 : if(ovfile.size()>0) {
647 0 : ovdd_ = read_exp_overlaps(ovfile);
648 : } else {
649 1982 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
650 1962 : double ov = get_self_overlap(i);
651 1962 : ovdd_.push_back(ov);
652 : }
653 : }
654 :
655 20 : log.printf(" number of GMM groups : %u\n", static_cast<unsigned>(GMM_d_grps_.size()));
656 : // cycle on GMM groups
657 40 : for(unsigned Gid=0; Gid<GMM_d_grps_.size(); ++Gid) {
658 20 : log.printf(" group %d\n", Gid);
659 : // calculate median overlap and experimental error
660 : std::vector<double> ovdd;
661 : std::vector<double> err;
662 : // cycle on the group members
663 1982 : for(unsigned i=0; i<GMM_d_grps_[Gid].size(); ++i) {
664 : // GMM id
665 1962 : int GMMid = GMM_d_grps_[Gid][i];
666 : // add to experimental error
667 1962 : if(errfile.size()>0) err.push_back(exp_err[GMMid]);
668 1962 : else err.push_back(0.);
669 : // add to GMM overlap
670 1962 : ovdd.push_back(ovdd_[GMMid]);
671 : }
672 : // calculate median quantities
673 20 : double ovdd_m = get_median(ovdd);
674 20 : double err_m = get_median(err);
675 : // print out statistics
676 20 : log.printf(" # of members : %zu\n", GMM_d_grps_[Gid].size());
677 20 : log.printf(" median overlap : %lf\n", ovdd_m);
678 20 : log.printf(" median error : %lf\n", err_m);
679 : // add minimum value of sigma for this group of GMMs
680 20 : sigma_min_.push_back(std::sqrt(err_m*err_m+sigma_min*ovdd_m*sigma_min*ovdd_m));
681 : // these are only needed with Gaussian and Outliers noise models
682 20 : if(noise_!=2) {
683 : // set dsigma
684 14 : dsigma_.push_back(dsigma * ovdd_m);
685 : // set sigma max
686 14 : sigma_max_.push_back(10.0*ovdd_m + sigma_min_[Gid] + dsigma_[Gid]);
687 : // initialize sigma
688 28 : sigma_.push_back(std::max(sigma_min_[Gid],std::min(sigma_ini*ovdd_m,sigma_max_[Gid])));
689 : }
690 : }
691 :
692 : // read status file if restarting
693 20 : if(getRestart() && noise_!=2) read_status();
694 :
695 : // calculate auxiliary stuff
696 20 : calculate_useful_stuff(reso);
697 :
698 : // prepare data and derivative std::vectors
699 20 : ovmd_.resize(ovdd_.size());
700 20 : ovmd_ave_.resize(ovdd_.size());
701 20 : atom_der_.resize(GMM_m_type_.size());
702 20 : GMMid_der_.resize(ovdd_.size());
703 :
704 : // clear things that are no longer needed
705 : GMM_d_cov_.clear();
706 :
707 : // add components
708 40 : addComponentWithDerivatives("scoreb"); componentIsNotPeriodic("scoreb");
709 :
710 48 : if(noise_!=2) {addComponent("acc"); componentIsNotPeriodic("acc");}
711 :
712 20 : if(nregres_>0) {
713 0 : addComponent("scale"); componentIsNotPeriodic("scale");
714 0 : addComponent("accscale"); componentIsNotPeriodic("accscale");
715 0 : addComponent("enescale"); componentIsNotPeriodic("enescale");
716 : }
717 :
718 20 : if(nanneal_>0) {addComponent("anneal"); componentIsNotPeriodic("anneal");}
719 :
720 20 : if(do_reweight_) {
721 4 : addComponent("biasDer");
722 4 : componentIsNotPeriodic("biasDer");
723 4 : addComponent("weight");
724 4 : componentIsNotPeriodic("weight");
725 : }
726 :
727 40 : addComponent("neff");
728 20 : componentIsNotPeriodic("neff");
729 :
730 34 : for(unsigned i=0; i<sigma_.size(); ++i) {
731 14 : std::string num; Tools::convert(i, num);
732 28 : addComponent("sigma-"+num); componentIsNotPeriodic("sigma-"+num);
733 28 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
734 : }
735 :
736 : // initialize random seed
737 : unsigned iseed;
738 20 : if(rank_==0) iseed = time(NULL)+replica_;
739 6 : else iseed = 0;
740 20 : comm.Sum(&iseed, 1);
741 20 : random_.setSeed(-iseed);
742 :
743 : // request the atoms
744 20 : requestAtoms(atoms);
745 20 : setDerivatives();
746 :
747 : // print bibliography
748 40 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
749 40 : log<<plumed.cite("Hanot, Bonomi, Greenberg, Sali, Nilges, Vendruscolo, Pellarin, bioRxiv doi: 10.1101/113951 (2017)");
750 40 : log<<plumed.cite("Bonomi, Pellarin, Vendruscolo, Biophys. J. 114, 1604 (2018)");
751 22 : if(do_reweight_) log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
752 22 : if(!no_aver_ && nrep_>1)log<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
753 20 : log<<"\n";
754 20 : }
755 :
756 0 : void EMMI::write_model_overlap(long long int step)
757 : {
758 0 : OFile ovfile;
759 0 : ovfile.link(*this);
760 0 : std::string num; Tools::convert(step,num);
761 0 : std::string name = ovfilename_+"-"+num;
762 0 : ovfile.open(name);
763 : ovfile.setHeavyFlush();
764 0 : ovfile.fmtField("%10.7e ");
765 : // write overlaps
766 0 : for(int i=0; i<ovmd_.size(); ++i) {
767 0 : ovfile.printField("Model", ovmd_[i]);
768 0 : ovfile.printField("ModelScaled", scale_ * ovmd_[i]);
769 0 : ovfile.printField("Data", ovdd_[i]);
770 0 : ovfile.printField();
771 : }
772 0 : ovfile.close();
773 0 : }
774 :
775 40 : double EMMI::get_median(std::vector<double> &v)
776 : {
777 : // dimension of std::vector
778 40 : unsigned size = v.size();
779 : // in case of only one entry
780 40 : if (size==1) {
781 0 : return v[0];
782 : } else {
783 : // reorder std::vector
784 40 : sort(v.begin(), v.end());
785 : // odd or even?
786 40 : if (size%2==0) {
787 4 : return (v[size/2-1]+v[size/2])/2.0;
788 : } else {
789 36 : return v[size/2];
790 : }
791 : }
792 : }
793 :
794 0 : void EMMI::read_status()
795 : {
796 : double MDtime;
797 : // open file
798 : auto ifile = Tools::make_unique<IFile>();
799 0 : ifile->link(*this);
800 0 : if(ifile->FileExist(statusfilename_)) {
801 0 : ifile->open(statusfilename_);
802 0 : while(ifile->scanField("MD_time", MDtime)) {
803 0 : for(unsigned i=0; i<sigma_.size(); ++i) {
804 : // convert i to std::string
805 0 : std::string num; Tools::convert(i,num);
806 : // read entries
807 0 : ifile->scanField("s"+num, sigma_[i]);
808 : }
809 : // new line
810 0 : ifile->scanField();
811 : }
812 0 : ifile->close();
813 : } else {
814 0 : error("Cannot find status file "+statusfilename_+"\n");
815 : }
816 0 : }
817 :
818 10904 : void EMMI::print_status(long long int step)
819 : {
820 : // if first time open the file
821 10904 : if(first_status_) {
822 14 : first_status_ = false;
823 14 : statusfile_.link(*this);
824 14 : statusfile_.open(statusfilename_);
825 : statusfile_.setHeavyFlush();
826 28 : statusfile_.fmtField("%6.3e ");
827 : }
828 : // write fields
829 10904 : double MDtime = static_cast<double>(step)*getTimeStep();
830 10904 : statusfile_.printField("MD_time", MDtime);
831 21808 : for(unsigned i=0; i<sigma_.size(); ++i) {
832 : // convert i to std::string
833 10904 : std::string num; Tools::convert(i,num);
834 : // print entry
835 21808 : statusfile_.printField("s"+num, sigma_[i]);
836 : }
837 10904 : statusfile_.printField();
838 10904 : }
839 :
840 0 : bool EMMI::doAccept(double oldE, double newE, double kbt) {
841 : bool accept = false;
842 : // calculate delta energy
843 0 : double delta = ( newE - oldE ) / kbt;
844 : // if delta is negative always accept move
845 0 : if( delta < 0.0 ) {
846 : accept = true;
847 : } else {
848 : // otherwise extract random number
849 0 : double s = random_.RandU01();
850 0 : if( s < std::exp(-delta) ) { accept = true; }
851 : }
852 0 : return accept;
853 : }
854 :
855 0 : void EMMI::doMonteCarlo()
856 : {
857 : // extract random GMM group
858 0 : unsigned nGMM = static_cast<unsigned>(std::floor(random_.RandU01()*static_cast<double>(GMM_d_grps_.size())));
859 0 : if(nGMM==GMM_d_grps_.size()) nGMM -= 1;
860 :
861 : // generate random move
862 0 : double shift = dsigma_[nGMM] * ( 2.0 * random_.RandU01() - 1.0 );
863 : // new sigma
864 0 : double new_s = sigma_[nGMM] + shift;
865 : // check boundaries
866 0 : if(new_s > sigma_max_[nGMM]) {new_s = 2.0 * sigma_max_[nGMM] - new_s;}
867 0 : if(new_s < sigma_min_[nGMM]) {new_s = 2.0 * sigma_min_[nGMM] - new_s;}
868 : // old s2
869 0 : double old_inv_s2 = 1.0 / sigma_[nGMM] / sigma_[nGMM];
870 : // new s2
871 0 : double new_inv_s2 = 1.0 / new_s / new_s;
872 :
873 : // cycle on GMM group and calculate old and new energy
874 : double old_ene = 0.0;
875 : double new_ene = 0.0;
876 0 : double ng = static_cast<double>(GMM_d_grps_[nGMM].size());
877 :
878 : // in case of Gaussian noise
879 0 : if(noise_==0) {
880 : double chi2 = 0.0;
881 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
882 : // id GMM component
883 0 : int GMMid = GMM_d_grps_[nGMM][i];
884 : // deviation
885 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
886 : // add to chi2
887 0 : chi2 += dev * dev;
888 : }
889 : // final energy calculation: add normalization and prior
890 0 : old_ene = 0.5 * kbt_ * ( chi2 * old_inv_s2 - (ng+prior_) * std::log(old_inv_s2) );
891 0 : new_ene = 0.5 * kbt_ * ( chi2 * new_inv_s2 - (ng+prior_) * std::log(new_inv_s2) );
892 : }
893 :
894 : // in case of Outliers noise
895 0 : if(noise_==1) {
896 0 : for(unsigned i=0; i<GMM_d_grps_[nGMM].size(); ++i) {
897 : // id GMM component
898 0 : int GMMid = GMM_d_grps_[nGMM][i];
899 : // calculate deviation
900 0 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
901 : // add to energies
902 0 : old_ene += std::log1p( 0.5 * dev * dev * old_inv_s2);
903 0 : new_ene += std::log1p( 0.5 * dev * dev * new_inv_s2);
904 : }
905 : // final energy calculation: add normalization and prior
906 0 : old_ene = kbt_ * ( old_ene + (ng+prior_) * std::log(sigma_[nGMM]) );
907 0 : new_ene = kbt_ * ( new_ene + (ng+prior_) * std::log(new_s) );
908 : }
909 :
910 : // increment number of trials
911 0 : MCtrials_ += 1.0;
912 :
913 : // accept or reject
914 0 : bool accept = doAccept(old_ene/anneal_, new_ene/anneal_, kbt_);
915 0 : if(accept) {
916 0 : sigma_[nGMM] = new_s;
917 0 : MCaccept_ += 1.0;
918 : }
919 : // local communication
920 0 : if(rank_!=0) {
921 0 : for(unsigned i=0; i<sigma_.size(); ++i) sigma_[i] = 0.0;
922 0 : MCaccept_ = 0.0;
923 : }
924 0 : if(size_>1) {
925 0 : comm.Sum(&sigma_[0], sigma_.size());
926 0 : comm.Sum(&MCaccept_, 1);
927 : }
928 :
929 : // update sigma output
930 0 : std::string num; Tools::convert(nGMM, num);
931 0 : getPntrToComponent("sigma-"+num)->set(sigma_[nGMM]);
932 0 : }
933 :
934 0 : std::vector<double> EMMI::read_exp_errors(const std::string & errfile)
935 : {
936 : int nexp, idcomp;
937 : double err;
938 : std::vector<double> exp_err;
939 : // open file
940 0 : IFile *ifile = new IFile();
941 0 : if(ifile->FileExist(errfile)) {
942 0 : ifile->open(errfile);
943 : // scan for number of experimental errors
944 0 : ifile->scanField("Nexp", nexp);
945 : // cycle on GMM components
946 0 : while(ifile->scanField("Id",idcomp)) {
947 : // total experimental error
948 0 : double err_tot = 0.0;
949 : // cycle on number of experimental overlaps
950 0 : for(unsigned i=0; i<nexp; ++i) {
951 0 : std::string ss; Tools::convert(i,ss);
952 0 : ifile->scanField("Err"+ss, err);
953 : // add to total error
954 0 : err_tot += err*err;
955 : }
956 : // new line
957 0 : ifile->scanField();
958 : // calculate RMSE
959 0 : err_tot = std::sqrt(err_tot/static_cast<double>(nexp));
960 : // add to global
961 0 : exp_err.push_back(err_tot);
962 : }
963 0 : ifile->close();
964 : } else {
965 0 : error("Cannot find ERR_FILE "+errfile+"\n");
966 : }
967 0 : return exp_err;
968 : }
969 :
970 0 : std::vector<double> EMMI::read_exp_overlaps(const std::string & ovfile)
971 : {
972 : int idcomp;
973 : double ov;
974 : std::vector<double> ovdd;
975 : // open file
976 0 : IFile *ifile = new IFile();
977 0 : if(ifile->FileExist(ovfile)) {
978 0 : ifile->open(ovfile);
979 : // cycle on GMM components
980 0 : while(ifile->scanField("Id",idcomp)) {
981 : // read experimental overlap
982 0 : ifile->scanField("Overlap", ov);
983 : // add to ovdd
984 0 : ovdd.push_back(ov);
985 : // new line
986 0 : ifile->scanField();
987 : }
988 0 : ifile->close();
989 : } else {
990 0 : error("Cannot find OV_FILE "+ovfile+"\n");
991 : }
992 0 : return ovdd;
993 : }
994 :
995 20 : std::vector<double> EMMI::get_GMM_m(std::vector<AtomNumber> &atoms)
996 : {
997 : // list of weights - one per atom
998 : std::vector<double> GMM_m_w;
999 :
1000 20 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
1001 : // map of atom types to A and B coefficients of scattering factor
1002 : // f(s) = A * exp(-B*s**2)
1003 : // B is in Angstrom squared
1004 : // map between an atom type and an index
1005 : std::map<std::string, unsigned> type_map;
1006 20 : type_map["C"]=0;
1007 20 : type_map["O"]=1;
1008 20 : type_map["N"]=2;
1009 20 : type_map["S"]=3;
1010 : // fill in sigma std::vector
1011 20 : GMM_m_s_.push_back(0.01*15.146); // type 0
1012 20 : GMM_m_s_.push_back(0.01*8.59722); // type 1
1013 20 : GMM_m_s_.push_back(0.01*11.1116); // type 2
1014 20 : GMM_m_s_.push_back(0.01*15.8952); // type 3
1015 : // fill in weight std::vector
1016 20 : GMM_m_w_.push_back(2.49982); // type 0
1017 20 : GMM_m_w_.push_back(1.97692); // type 1
1018 20 : GMM_m_w_.push_back(2.20402); // type 2
1019 20 : GMM_m_w_.push_back(5.14099); // type 3
1020 :
1021 : // check if MOLINFO line is present
1022 20 : if( moldat ) {
1023 20 : log<<" MOLINFO DATA found with label " <<moldat->getLabel()<<", using proper atom names\n";
1024 10876 : for(unsigned i=0; i<atoms.size(); ++i) {
1025 : // get atom name
1026 10856 : std::string name = moldat->getAtomName(atoms[i]);
1027 : char type;
1028 : // get atom type
1029 10856 : char first = name.at(0);
1030 : // GOLDEN RULE: type is first letter, if not a number
1031 10856 : if (!isdigit(first)) {
1032 : type = first;
1033 : // otherwise is the second
1034 : } else {
1035 0 : type = name.at(1);
1036 : }
1037 : // check if key in map
1038 10856 : std::string type_s = std::string(1,type);
1039 10856 : if(type_map.find(type_s) != type_map.end()) {
1040 : // save atom type
1041 10856 : GMM_m_type_.push_back(type_map[type_s]);
1042 : // this will be normalized in the final density
1043 10856 : GMM_m_w.push_back(GMM_m_w_[type_map[type_s]]);
1044 : } else {
1045 0 : error("Wrong atom type "+type_s+" from atom name "+name+"\n");
1046 : }
1047 : }
1048 : } else {
1049 0 : error("MOLINFO DATA not found\n");
1050 : }
1051 20 : return GMM_m_w;
1052 : }
1053 :
1054 1962 : void EMMI::check_GMM_d(const VectorGeneric<6> &cov, double w)
1055 : {
1056 :
1057 : // check if positive defined, by calculating the 3 leading principal minors
1058 1962 : double pm1 = cov[0];
1059 1962 : double pm2 = cov[0]*cov[3]-cov[1]*cov[1];
1060 1962 : double pm3 = cov[0]*(cov[3]*cov[5]-cov[4]*cov[4])-cov[1]*(cov[1]*cov[5]-cov[4]*cov[2])+cov[2]*(cov[1]*cov[4]-cov[3]*cov[2]);
1061 : // apply Sylvester’s criterion
1062 1962 : if(pm1<=0.0 || pm2<=0.0 || pm3<=0.0)
1063 0 : error("check data GMM: covariance matrix is not positive defined");
1064 :
1065 : // check if weight is positive
1066 1962 : if(w<=0) error("check data GMM: weight must be positive");
1067 1962 : }
1068 :
1069 : // read GMM data file in PLUMED format:
1070 20 : void EMMI::get_GMM_d(const std::string & GMM_file)
1071 : {
1072 20 : VectorGeneric<6> cov;
1073 :
1074 : // open file
1075 : auto ifile=Tools::make_unique<IFile>();
1076 20 : if(ifile->FileExist(GMM_file)) {
1077 20 : ifile->open(GMM_file);
1078 : int idcomp;
1079 3964 : while(ifile->scanField("Id",idcomp)) {
1080 : int beta;
1081 : double w, m0, m1, m2;
1082 3924 : ifile->scanField("Weight",w);
1083 3924 : ifile->scanField("Mean_0",m0);
1084 3924 : ifile->scanField("Mean_1",m1);
1085 3924 : ifile->scanField("Mean_2",m2);
1086 3924 : ifile->scanField("Cov_00",cov[0]);
1087 3924 : ifile->scanField("Cov_01",cov[1]);
1088 3924 : ifile->scanField("Cov_02",cov[2]);
1089 3924 : ifile->scanField("Cov_11",cov[3]);
1090 3924 : ifile->scanField("Cov_12",cov[4]);
1091 3924 : ifile->scanField("Cov_22",cov[5]);
1092 1962 : ifile->scanField("Beta",beta);
1093 : // check input
1094 1962 : check_GMM_d(cov, w);
1095 : // check beta
1096 1962 : if(beta<0) error("Beta must be positive!");
1097 : // center of the Gaussian
1098 1962 : GMM_d_m_.push_back(Vector(m0,m1,m2));
1099 : // covariance matrix
1100 1962 : GMM_d_cov_.push_back(cov);
1101 : // weight
1102 1962 : GMM_d_w_.push_back(w);
1103 : // beta
1104 1962 : GMM_d_beta_.push_back(beta);
1105 : // new line
1106 1962 : ifile->scanField();
1107 : }
1108 : } else {
1109 0 : error("Cannot find GMM_FILE "+GMM_file+"\n");
1110 : }
1111 : // now create a set from beta (unique set of values)
1112 20 : std::set<int> bu(GMM_d_beta_.begin(), GMM_d_beta_.end());
1113 : // now prepare the group std::vector
1114 20 : GMM_d_grps_.resize(bu.size());
1115 : // and fill it in
1116 1982 : for(unsigned i=0; i<GMM_d_beta_.size(); ++i) {
1117 1962 : if(GMM_d_beta_[i]>=GMM_d_grps_.size()) error("Check Beta values");
1118 1962 : GMM_d_grps_[GMM_d_beta_[i]].push_back(i);
1119 : }
1120 20 : }
1121 :
1122 20 : void EMMI::calculate_useful_stuff(double reso)
1123 : {
1124 : // We use the following definition for resolution:
1125 : // the Fourier transform of the density distribution in real space
1126 : // f(s) falls to 1/e of its maximum value at wavenumber 1/resolution
1127 : // i.e. from f(s) = A * exp(-B*s**2) -> Res = std::sqrt(B).
1128 : // average value of B
1129 : double Bave = 0.0;
1130 10876 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1131 10856 : Bave += GMM_m_s_[GMM_m_type_[i]];
1132 : }
1133 20 : Bave /= static_cast<double>(GMM_m_type_.size());
1134 : // calculate blur factor
1135 : double blur = 0.0;
1136 20 : if(reso*reso>Bave) blur = reso*reso-Bave;
1137 36 : else warning("PLUMED should not be used with maps at resolution better than 0.3 nm");
1138 : // add blur to B
1139 100 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) GMM_m_s_[i] += blur;
1140 : // calculate average resolution
1141 : double ave_res = 0.0;
1142 10876 : for(unsigned i=0; i<GMM_m_type_.size(); ++i) {
1143 10856 : ave_res += std::sqrt(GMM_m_s_[GMM_m_type_[i]]);
1144 : }
1145 20 : ave_res = ave_res / static_cast<double>(GMM_m_type_.size());
1146 20 : log.printf(" experimental map resolution : %3.2f\n", reso);
1147 20 : log.printf(" predicted map resolution : %3.2f\n", ave_res);
1148 20 : log.printf(" blur factor : %f\n", blur);
1149 : // now calculate useful stuff
1150 20 : VectorGeneric<6> cov, sum, inv_sum;
1151 : // cycle on all atoms types (4 for the moment)
1152 100 : for(unsigned i=0; i<GMM_m_s_.size(); ++i) {
1153 : // the Gaussian in density (real) space is the FT of scattering factor
1154 : // f(r) = A * (pi/B)**1.5 * exp(-pi**2/B*r**2)
1155 80 : double s = std::sqrt ( 0.5 * GMM_m_s_[i] ) / pi;
1156 : // covariance matrix for spherical Gaussian
1157 80 : cov[0]=s*s; cov[1]=0.0; cov[2]=0.0;
1158 80 : cov[3]=s*s; cov[4]=0.0;
1159 80 : cov[5]=s*s;
1160 : // cycle on all data GMM
1161 7928 : for(unsigned j=0; j<GMM_d_m_.size(); ++j) {
1162 : // we need the sum of the covariance matrices
1163 54936 : for(unsigned k=0; k<6; ++k) sum[k] = cov[k] + GMM_d_cov_[j][k];
1164 : // and to calculate its determinant
1165 7848 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1166 7848 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1167 7848 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1168 : // calculate prefactor - model weights are already normalized
1169 7848 : double pre_fact = cfact_ / std::sqrt(det) * GMM_d_w_[j] * GMM_m_w_[i];
1170 : // and its inverse
1171 7848 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1172 7848 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1173 7848 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1174 7848 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1175 7848 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1176 7848 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1177 : // now we store the prefactor
1178 7848 : pre_fact_.push_back(pre_fact);
1179 : // and the inverse of the sum
1180 7848 : inv_cov_md_.push_back(inv_sum);
1181 : }
1182 : }
1183 : // tabulate exponential
1184 20 : dexp_ = dpcutoff_ / static_cast<double> (nexp_-1);
1185 20000020 : for(unsigned i=0; i<nexp_; ++i) {
1186 20000000 : tab_exp_.push_back(std::exp(-static_cast<double>(i) * dexp_));
1187 : }
1188 20 : }
1189 :
1190 : // get prefactors
1191 1621458 : double EMMI::get_prefactor_inverse
1192 : (const VectorGeneric<6> &GMM_cov_0, const VectorGeneric<6> &GMM_cov_1,
1193 : double GMM_w_0, double GMM_w_1,
1194 : VectorGeneric<6> &sum, VectorGeneric<6> &inv_sum)
1195 : {
1196 : // we need the sum of the covariance matrices
1197 11350206 : for(unsigned k=0; k<6; ++k) sum[k] = GMM_cov_0[k] + GMM_cov_1[k];
1198 :
1199 : // and to calculate its determinant
1200 1621458 : double det = sum[0]*(sum[3]*sum[5]-sum[4]*sum[4]);
1201 1621458 : det -= sum[1]*(sum[1]*sum[5]-sum[4]*sum[2]);
1202 1621458 : det += sum[2]*(sum[1]*sum[4]-sum[3]*sum[2]);
1203 :
1204 : // the prefactor is
1205 1621458 : double pre_fact = cfact_ / std::sqrt(det) * GMM_w_0 * GMM_w_1;
1206 :
1207 : // and its inverse
1208 1621458 : inv_sum[0] = (sum[3]*sum[5] - sum[4]*sum[4])/det;
1209 1621458 : inv_sum[1] = (sum[2]*sum[4] - sum[1]*sum[5])/det;
1210 1621458 : inv_sum[2] = (sum[1]*sum[4] - sum[2]*sum[3])/det;
1211 1621458 : inv_sum[3] = (sum[0]*sum[5] - sum[2]*sum[2])/det;
1212 1621458 : inv_sum[4] = (sum[2]*sum[1] - sum[0]*sum[4])/det;
1213 1621458 : inv_sum[5] = (sum[0]*sum[3] - sum[1]*sum[1])/det;
1214 :
1215 : // return pre-factor
1216 1621458 : return pre_fact;
1217 : }
1218 :
1219 1962 : double EMMI::get_self_overlap(unsigned id)
1220 : {
1221 : double ov_tot = 0.0;
1222 1962 : VectorGeneric<6> sum, inv_sum;
1223 1962 : Vector ov_der;
1224 : // start loop
1225 1623420 : for(unsigned i=0; i<GMM_d_m_.size(); ++i) {
1226 : // call auxiliary method
1227 1621458 : double pre_fact = get_prefactor_inverse(GMM_d_cov_[id], GMM_d_cov_[i],
1228 1621458 : GMM_d_w_[id], GMM_d_w_[i], sum, inv_sum);
1229 : // add overlap to ov_tot
1230 1621458 : ov_tot += get_overlap(GMM_d_m_[id], GMM_d_m_[i], pre_fact, inv_sum, ov_der);
1231 : }
1232 : // and return it
1233 1962 : return ov_tot;
1234 : }
1235 :
1236 : // get overlap and derivatives
1237 3732816 : double EMMI::get_overlap(const Vector &m_m, const Vector &d_m, double pre_fact,
1238 : const VectorGeneric<6> &inv_cov_md, Vector &ov_der)
1239 : {
1240 3732816 : Vector md;
1241 : // calculate std::vector difference m_m-d_m with/without pbc
1242 3732816 : if(pbc_) md = pbcDistance(d_m, m_m);
1243 3732816 : else md = delta(d_m, m_m);
1244 : // calculate product of transpose of md and inv_cov_md
1245 3732816 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1246 3732816 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1247 3732816 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1248 : // calculate product of prod and md
1249 3732816 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1250 : // final calculation
1251 3732816 : ov = pre_fact * std::exp(-0.5*ov);
1252 : // derivatives
1253 3732816 : ov_der = ov * Vector(p_x, p_y, p_z);
1254 3732816 : return ov;
1255 : }
1256 :
1257 : // get the exponent of the overlap
1258 59085036 : double EMMI::get_exp_overlap(const Vector &m_m, const Vector &d_m,
1259 : const VectorGeneric<6> &inv_cov_md)
1260 : {
1261 59085036 : Vector md;
1262 : // calculate std::vector difference m_m-d_m with/without pbc
1263 59085036 : if(pbc_) md = pbcDistance(d_m, m_m);
1264 59085036 : else md = delta(d_m, m_m);
1265 : // calculate product of transpose of md and inv_cov_md
1266 59085036 : double p_x = md[0]*inv_cov_md[0]+md[1]*inv_cov_md[1]+md[2]*inv_cov_md[2];
1267 59085036 : double p_y = md[0]*inv_cov_md[1]+md[1]*inv_cov_md[3]+md[2]*inv_cov_md[4];
1268 59085036 : double p_z = md[0]*inv_cov_md[2]+md[1]*inv_cov_md[4]+md[2]*inv_cov_md[5];
1269 : // calculate product of prod and md
1270 59085036 : double ov = md[0]*p_x+md[1]*p_y+md[2]*p_z;
1271 59085036 : return ov;
1272 : }
1273 :
1274 16355 : void EMMI::update_neighbor_list()
1275 : {
1276 : // dimension of GMM and atom std::vectors
1277 16355 : unsigned GMM_d_size = GMM_d_m_.size();
1278 16355 : unsigned GMM_m_size = GMM_m_type_.size();
1279 : // local neighbor list
1280 : std::vector < unsigned > nl_l;
1281 : // clear old neighbor list
1282 16355 : nl_.clear();
1283 :
1284 : // cycle on GMM components - in parallel
1285 116273 : for(unsigned id=rank_; id<GMM_d_size; id+=size_) {
1286 : // overlap lists and map
1287 : std::vector<double> ov_l;
1288 : std::map<double, unsigned> ov_m;
1289 : // total overlap with id
1290 : double ov_tot = 0.0;
1291 : // cycle on all atoms
1292 59184954 : for(unsigned im=0; im<GMM_m_size; ++im) {
1293 : // get index in auxiliary lists
1294 59085036 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1295 : // calculate exponent of overlap
1296 59085036 : double expov = get_exp_overlap(GMM_d_m_[id], getPosition(im), inv_cov_md_[kaux]);
1297 : // get index of 0.5*expov in tabulated exponential
1298 59085036 : unsigned itab = static_cast<unsigned> (round( 0.5*expov/dexp_ ));
1299 : // check boundaries and skip atom in case
1300 59085036 : if(itab >= tab_exp_.size()) continue;
1301 : // in case calculate overlap
1302 4133388 : double ov = pre_fact_[kaux] * tab_exp_[itab];
1303 : // add to list
1304 4133388 : ov_l.push_back(ov);
1305 : // and map to retrieve atom index
1306 4133388 : ov_m[ov] = im;
1307 : // increase ov_tot
1308 4133388 : ov_tot += ov;
1309 : }
1310 : // check if zero size -> ov_tot = 0
1311 99918 : if(ov_l.size()==0) continue;
1312 : // define cutoff
1313 99842 : double ov_cut = ov_tot * nl_cutoff_;
1314 : // sort ov_l in ascending order
1315 99842 : std::sort(ov_l.begin(), ov_l.end());
1316 : // integrate ov_l
1317 : double res = 0.0;
1318 2146102 : for(unsigned i=0; i<ov_l.size(); ++i) {
1319 2146102 : res += ov_l[i];
1320 : // if exceeding the cutoff for overlap, stop
1321 2146102 : if(res >= ov_cut) break;
1322 : else ov_m.erase(ov_l[i]);
1323 : }
1324 : // now add atoms to neighborlist
1325 2186970 : for(std::map<double, unsigned>::iterator it=ov_m.begin(); it!=ov_m.end(); ++it)
1326 2087128 : nl_l.push_back(id*GMM_m_size+it->second);
1327 : // end cycle on GMM components in parallel
1328 : }
1329 : // find total dimension of neighborlist
1330 16355 : std::vector <int> recvcounts(size_, 0);
1331 16355 : recvcounts[rank_] = nl_l.size();
1332 16355 : comm.Sum(&recvcounts[0], size_);
1333 : int tot_size = accumulate(recvcounts.begin(), recvcounts.end(), 0);
1334 : // resize neighbor stuff
1335 16355 : nl_.resize(tot_size);
1336 : // calculate std::vector of displacement
1337 16355 : std::vector<int> disp(size_);
1338 16355 : disp[0] = 0;
1339 : int rank_size = 0;
1340 27257 : for(unsigned i=0; i<size_-1; ++i) {
1341 10902 : rank_size += recvcounts[i];
1342 10902 : disp[i+1] = rank_size;
1343 : }
1344 : // Allgather neighbor list
1345 16355 : comm.Allgatherv(&nl_l[0], recvcounts[rank_], &nl_[0], &recvcounts[0], &disp[0]);
1346 : // now resize derivatives
1347 16355 : ovmd_der_.resize(tot_size);
1348 16355 : }
1349 :
1350 30 : void EMMI::prepare()
1351 : {
1352 30 : if(getExchangeStep()) first_time_=true;
1353 30 : }
1354 :
1355 : // overlap calculator
1356 16365 : void EMMI::calculate_overlap() {
1357 :
1358 16365 : if(first_time_ || getExchangeStep() || getStep()%nl_stride_==0) {
1359 16355 : update_neighbor_list();
1360 16355 : first_time_=false;
1361 : }
1362 :
1363 : // clean temporary std::vectors
1364 174342 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_[i] = 0.0;
1365 3168864 : for(unsigned i=0; i<ovmd_der_.size(); ++i) ovmd_der_[i] = Vector(0,0,0);
1366 :
1367 : // we have to cycle over all model and data GMM components in the neighbor list
1368 16365 : unsigned GMM_d_size = GMM_d_m_.size();
1369 16365 : unsigned GMM_m_size = GMM_m_type_.size();
1370 2127723 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1371 : // get data (id) and atom (im) indexes
1372 2111358 : unsigned id = nl_[i] / GMM_m_size;
1373 2111358 : unsigned im = nl_[i] % GMM_m_size;
1374 : // get index in auxiliary lists
1375 2111358 : unsigned kaux = GMM_m_type_[im] * GMM_d_size + id;
1376 : // add overlap with im component of model GMM
1377 2111358 : ovmd_[id] += get_overlap(GMM_d_m_[id], getPosition(im), pre_fact_[kaux],
1378 2111358 : inv_cov_md_[kaux], ovmd_der_[i]);
1379 : }
1380 : // communicate stuff
1381 16365 : if(size_>1) {
1382 10902 : comm.Sum(&ovmd_[0], ovmd_.size());
1383 10902 : comm.Sum(&ovmd_der_[0][0], 3*ovmd_der_.size());
1384 : }
1385 16365 : }
1386 :
1387 0 : double EMMI::scaleEnergy(double s)
1388 : {
1389 : double ene = 0.0;
1390 0 : for(unsigned i=0; i<ovdd_.size(); ++i) {
1391 0 : ene += std::log( abs ( s * ovmd_ave_[i] - ovdd_[i] ) );
1392 : }
1393 0 : return ene;
1394 : }
1395 :
1396 0 : double EMMI::doRegression()
1397 : {
1398 : // standard MC parameters
1399 : unsigned MCsteps = 100000;
1400 : double kbtmin = 1.0;
1401 : double kbtmax = 10.0;
1402 : unsigned ncold = 5000;
1403 : unsigned nhot = 2000;
1404 : double MCacc = 0.0;
1405 : double kbt, ebest, scale_best;
1406 :
1407 : // initial value of scale factor and energy
1408 0 : double scale = random_.RandU01() * ( scale_max_ - scale_min_ ) + scale_min_;
1409 0 : double ene = scaleEnergy(scale);
1410 : // set best energy
1411 0 : ebest = ene;
1412 :
1413 : // MC loop
1414 0 : for(unsigned istep=0; istep<MCsteps; ++istep) {
1415 : // get temperature
1416 0 : if(istep%(ncold+nhot)<ncold) kbt = kbtmin;
1417 : else kbt = kbtmax;
1418 : // propose move in scale
1419 0 : double ds = dscale_ * ( 2.0 * random_.RandU01() - 1.0 );
1420 0 : double new_scale = scale + ds;
1421 : // check boundaries
1422 0 : if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;}
1423 0 : if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;}
1424 : // new energy
1425 0 : double new_ene = scaleEnergy(new_scale);
1426 : // accept or reject
1427 0 : bool accept = doAccept(ene, new_ene, kbt);
1428 : // in case of acceptance
1429 0 : if(accept) {
1430 : scale = new_scale;
1431 : ene = new_ene;
1432 0 : MCacc += 1.0;
1433 : }
1434 : // save best
1435 0 : if(ene<ebest) {
1436 0 : ebest = ene;
1437 0 : scale_best = scale;
1438 : }
1439 : }
1440 : // calculate acceptance
1441 0 : double accscale = MCacc / static_cast<double>(MCsteps);
1442 : // global communication
1443 0 : if(!no_aver_ && nrep_>1) {
1444 0 : if(replica_!=0) {
1445 0 : scale_best = 0.0;
1446 0 : ebest = 0.0;
1447 0 : accscale = 0.0;
1448 : }
1449 0 : if(rank_==0) {
1450 0 : multi_sim_comm.Sum(&scale_best, 1);
1451 0 : multi_sim_comm.Sum(&ebest, 1);
1452 0 : multi_sim_comm.Sum(&accscale, 1);
1453 : }
1454 : }
1455 : // local communication
1456 0 : if(rank_!=0) {
1457 0 : scale_best = 0.0;
1458 0 : ebest = 0.0;
1459 0 : accscale = 0.0;
1460 : }
1461 0 : if(size_>1) {
1462 0 : comm.Sum(&scale_best, 1);
1463 0 : comm.Sum(&ebest, 1);
1464 0 : comm.Sum(&accscale, 1);
1465 : }
1466 : // set scale parameters
1467 0 : getPntrToComponent("accscale")->set(accscale);
1468 0 : getPntrToComponent("enescale")->set(ebest);
1469 : // return scale value
1470 0 : return scale_best;
1471 : }
1472 :
1473 0 : double EMMI::get_annealing(long long int step)
1474 : {
1475 : // default no annealing
1476 : double fact = 1.0;
1477 : // position in annealing cycle
1478 0 : unsigned nc = step%(4*nanneal_);
1479 : // useful doubles
1480 0 : double ncd = static_cast<double>(nc);
1481 0 : double nn = static_cast<double>(nanneal_);
1482 : // set fact
1483 0 : if(nc>=nanneal_ && nc<2*nanneal_) fact = (kanneal_-1.0) / nn * ( ncd - nn ) + 1.0;
1484 0 : if(nc>=2*nanneal_ && nc<3*nanneal_) fact = kanneal_;
1485 0 : if(nc>=3*nanneal_) fact = (1.0-kanneal_) / nn * ( ncd - 3.0*nn) + kanneal_;
1486 0 : return fact;
1487 : }
1488 :
1489 16365 : void EMMI::get_weights(double &weight, double &norm, double &neff)
1490 : {
1491 16365 : const double dnrep = static_cast<double>(nrep_);
1492 : // calculate the weights either from BIAS
1493 16365 : if(do_reweight_) {
1494 12 : std::vector<double> bias(nrep_,0);
1495 12 : if(rank_==0) {
1496 12 : bias[replica_] = getArgument(0);
1497 12 : if(nrep_>1) multi_sim_comm.Sum(&bias[0], nrep_);
1498 : }
1499 12 : comm.Sum(&bias[0], nrep_);
1500 :
1501 : // accumulate weights
1502 12 : if(!first_time_w_) {
1503 30 : for(unsigned i=0; i<nrep_; ++i) {
1504 20 : const double delta=bias[i]-average_weights_[i];
1505 : // FIXME: multiplying by fractional decay here causes problems with numerical derivatives,
1506 : // probably because we're making several calls to calculate(), causing accumulation
1507 : // of epsilons. Maybe we can work on a temporary copy of the action instead?
1508 20 : average_weights_[i]+=decay_w_*delta;
1509 : }
1510 : } else {
1511 2 : first_time_w_ = false;
1512 6 : for(unsigned i=0; i<nrep_; ++i) average_weights_[i] = bias[i];
1513 : }
1514 :
1515 : // set average back into bias and set norm to one
1516 12 : const double maxbias = *(std::max_element(average_weights_.begin(), average_weights_.end()));
1517 36 : for(unsigned i=0; i<nrep_; ++i) bias[i] = std::exp((average_weights_[i]-maxbias)/kbt_);
1518 : // set local weight, norm and weight variance
1519 12 : weight = bias[replica_];
1520 : double w2=0.;
1521 36 : for(unsigned i=0; i<nrep_; ++i) {
1522 24 : w2 += bias[i]*bias[i];
1523 24 : norm += bias[i];
1524 : }
1525 12 : neff = norm*norm/w2;
1526 24 : getPntrToComponent("weight")->set(weight/norm);
1527 : } else {
1528 : // or arithmetic ones
1529 16353 : neff = dnrep;
1530 16353 : weight = 1.0;
1531 16353 : norm = dnrep;
1532 : }
1533 16365 : getPntrToComponent("neff")->set(neff);
1534 16365 : }
1535 :
1536 16365 : void EMMI::calculate()
1537 : {
1538 :
1539 : // calculate CV
1540 16365 : calculate_overlap();
1541 :
1542 : // rescale factor for ensemble average
1543 16365 : double weight = 0.;
1544 16365 : double neff = 0.;
1545 16365 : double norm = 0.;
1546 16365 : get_weights(weight, norm, neff);
1547 :
1548 : // in case of ensemble averaging, calculate average overlap
1549 16365 : if(!no_aver_ && nrep_>1) {
1550 : // if master node, calculate average across replicas
1551 12 : if(rank_==0) {
1552 10812 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] = weight / norm * ovmd_[i];
1553 12 : multi_sim_comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1554 : } else {
1555 0 : for(unsigned i=0; i<ovmd_ave_.size(); ++i) ovmd_ave_[i] = 0.0;
1556 : }
1557 : // local communication
1558 12 : if(size_>1) comm.Sum(&ovmd_ave_[0], ovmd_ave_.size());
1559 : } else {
1560 163530 : for(unsigned i=0; i<ovmd_.size(); ++i) ovmd_ave_[i] = ovmd_[i];
1561 : }
1562 :
1563 : // get time step
1564 16365 : long long int step = getStep();
1565 :
1566 : // do regression
1567 16365 : if(nregres_>0) {
1568 0 : if(step%nregres_==0 && !getExchangeStep()) scale_ = doRegression();
1569 : // set scale component
1570 0 : getPntrToComponent("scale")->set(scale_);
1571 : }
1572 :
1573 : // write model overlap to file
1574 16365 : if(ovstride_>0 && step%ovstride_==0) write_model_overlap(step);
1575 :
1576 : // clear energy and virial
1577 16365 : ene_ = 0.0;
1578 16365 : virial_.zero();
1579 :
1580 : // Gaussian noise
1581 16365 : if(noise_==0) calculate_Gauss();
1582 :
1583 : // Outliers noise
1584 16365 : if(noise_==1) calculate_Outliers();
1585 :
1586 : // Marginal noise
1587 16365 : if(noise_==2) calculate_Marginal();
1588 :
1589 : // get annealing rescale factor
1590 16365 : if(nanneal_>0) {
1591 0 : anneal_ = get_annealing(step);
1592 0 : getPntrToComponent("anneal")->set(anneal_);
1593 : }
1594 :
1595 : // annealing rescale
1596 16365 : ene_ /= anneal_;
1597 :
1598 16365 : std::vector<double> GMMid_der_av_(GMMid_der_.size(), 0.0);
1599 : // in case of ensemble averaging
1600 16365 : if(!no_aver_ && nrep_>1) {
1601 : // if master node, sum der_GMMid derivatives and ene
1602 12 : if(rank_==0) {
1603 10812 : for(unsigned i=0; i<GMMid_der_.size(); ++i) {
1604 10800 : GMMid_der_av_[i] = (weight / norm) * GMMid_der_[i];
1605 : }
1606 12 : multi_sim_comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1607 12 : multi_sim_comm.Sum(&ene_, 1);
1608 : } else {
1609 : // set der_GMMid derivatives and energy to zero
1610 0 : for(unsigned i=0; i<GMMid_der_av_.size(); ++i) GMMid_der_av_[i]=0.0;
1611 0 : ene_ = 0.0;
1612 : }
1613 : // local communication
1614 12 : if(size_>1) {
1615 0 : comm.Sum(&GMMid_der_av_[0], GMMid_der_av_.size());
1616 0 : comm.Sum(&ene_, 1);
1617 : }
1618 : } else {
1619 163530 : for (unsigned i = 0; i < GMMid_der_.size(); ++i) {
1620 147177 : GMMid_der_av_[i] = GMMid_der_[i];
1621 : }
1622 : }
1623 :
1624 : // clean temporary std::vector
1625 9860991 : for(unsigned i=0; i<atom_der_.size(); ++i) atom_der_[i] = Vector(0,0,0);
1626 :
1627 : // get derivatives of bias with respect to atoms
1628 2127723 : for(unsigned i=rank_; i<nl_.size(); i=i+size_) {
1629 : // get indexes of data and model component
1630 2111358 : unsigned id = nl_[i] / GMM_m_type_.size();
1631 2111358 : unsigned im = nl_[i] % GMM_m_type_.size();
1632 : // chain rule + replica normalization
1633 2111358 : Vector tot_der = GMMid_der_av_[id] * ovmd_der_[i] * scale_ / anneal_;
1634 2111358 : Vector pos;
1635 2111358 : if(pbc_) pos = pbcDistance(GMM_d_m_[id], getPosition(im)) + GMM_d_m_[id];
1636 2111358 : else pos = getPosition(im);
1637 : // increment derivatives and virial
1638 2111358 : atom_der_[im] += tot_der;
1639 2111358 : virial_ += Tensor(pos, -tot_der);
1640 : }
1641 :
1642 : // communicate local derivatives and virial
1643 16365 : if(size_>1) {
1644 10902 : comm.Sum(&atom_der_[0][0], 3*atom_der_.size());
1645 10902 : comm.Sum(virial_);
1646 : }
1647 :
1648 : // set derivatives, virial, and score
1649 9860991 : for(unsigned i=0; i<atom_der_.size(); ++i) setAtomsDerivatives(getPntrToComponent("scoreb"), i, atom_der_[i]);
1650 16365 : setBoxDerivatives(getPntrToComponent("scoreb"), virial_);
1651 16365 : getPntrToComponent("scoreb")->set(ene_);
1652 :
1653 16365 : if (do_reweight_) {
1654 : double w_tmp = 0.;
1655 10812 : for (unsigned i = 0; i < ovmd_.size(); ++i) {
1656 10800 : w_tmp += (ovmd_[i] - ovmd_ave_[i]) * GMMid_der_[i];
1657 : }
1658 12 : w_tmp *= scale_ * (weight / norm) / kbt_ * decay_w_;
1659 :
1660 12 : setArgDerivatives(getPntrToComponent("scoreb"), w_tmp);
1661 24 : getPntrToComponent("biasDer")->set(w_tmp);
1662 : }
1663 :
1664 : // This part is needed only for Gaussian and Outliers noise models
1665 16365 : if(noise_!=2) {
1666 :
1667 : // do Montecarlo
1668 10914 : if(dsigma_[0]>0 && step%MCstride_==0 && !getExchangeStep()) doMonteCarlo();
1669 :
1670 : // print status
1671 10914 : if(step%statusstride_==0) print_status(step);
1672 :
1673 : // calculate acceptance ratio
1674 10914 : double acc = MCaccept_ / MCtrials_;
1675 :
1676 : // set value
1677 21828 : getPntrToComponent("acc")->set(acc);
1678 :
1679 : }
1680 :
1681 16365 : }
1682 :
1683 5463 : void EMMI::calculate_Gauss()
1684 : {
1685 : // cycle on all the GMM groups
1686 10926 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1687 : double eneg = 0.0;
1688 : // cycle on all the members of the group
1689 65322 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1690 : // id of the GMM component
1691 59859 : int GMMid = GMM_d_grps_[i][j];
1692 : // calculate deviation
1693 59859 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1694 : // add to group energy
1695 59859 : eneg += 0.5 * dev * dev;
1696 : // store derivative for later
1697 59859 : GMMid_der_[GMMid] = kbt_ * dev / sigma_[i];
1698 : }
1699 : // add to total energy along with normalizations and prior
1700 5463 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1701 : }
1702 5463 : }
1703 :
1704 5451 : void EMMI::calculate_Outliers()
1705 : {
1706 : // cycle on all the GMM groups
1707 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1708 : // cycle on all the members of the group
1709 : double eneg = 0.0;
1710 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1711 : // id of the GMM component
1712 49059 : int GMMid = GMM_d_grps_[i][j];
1713 : // calculate deviation
1714 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] ) / sigma_[i];
1715 : // add to group energy
1716 49059 : eneg += std::log1p( 0.5 * dev * dev );
1717 : // store derivative for later
1718 49059 : GMMid_der_[GMMid] = kbt_ / ( 1.0 + 0.5 * dev * dev ) * dev / sigma_[i];
1719 : }
1720 : // add to total energy along with normalizations and prior
1721 5451 : ene_ += kbt_ * ( eneg + (static_cast<double>(GMM_d_grps_[i].size())+prior_) * std::log(sigma_[i]) );
1722 : }
1723 5451 : }
1724 :
1725 5451 : void EMMI::calculate_Marginal()
1726 : {
1727 : // cycle on all the GMM groups
1728 10902 : for(unsigned i=0; i<GMM_d_grps_.size(); ++i) {
1729 : // cycle on all the members of the group
1730 54510 : for(unsigned j=0; j<GMM_d_grps_[i].size(); ++j) {
1731 : // id of the GMM component
1732 49059 : int GMMid = GMM_d_grps_[i][j];
1733 : // calculate deviation
1734 49059 : double dev = ( scale_*ovmd_ave_[GMMid]-ovdd_[GMMid] );
1735 : // calculate errf
1736 49059 : double errf = erf ( dev * inv_sqrt2_ / sigma_min_[i] );
1737 : // add to group energy
1738 49059 : ene_ += -kbt_ * std::log ( 0.5 / dev * errf ) ;
1739 : // store derivative for later
1740 49059 : GMMid_der_[GMMid] = - kbt_/errf*sqrt2_pi_*std::exp(-0.5*dev*dev/sigma_min_[i]/sigma_min_[i])/sigma_min_[i]+kbt_/dev;
1741 : }
1742 : }
1743 5451 : }
1744 :
1745 : }
1746 : }
|