Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 :
23 : #include "bias/Bias.h"
24 : #include "bias/ActionRegister.h"
25 : #include "core/PlumedMain.h"
26 : #include "core/Atoms.h"
27 : #include "core/Value.h"
28 : #include "tools/File.h"
29 : #include "tools/OpenMP.h"
30 : #include "tools/Random.h"
31 : #include <chrono>
32 : #include <numeric>
33 :
34 : #ifndef M_PI
35 : #define M_PI 3.14159265358979323846
36 : #endif
37 :
38 : namespace PLMD {
39 : namespace isdb {
40 :
41 : //+PLUMEDOC ISDB_BIAS METAINFERENCE
42 : /*
43 : Calculates the Metainference energy for a set of experimental data.
44 :
45 : Metainference \cite Bonomi:2016ip is a Bayesian framework
46 : to model heterogeneous systems by integrating prior information with noisy, ensemble-averaged data.
47 : Metainference models a system and quantifies the level of noise in the data by considering a set of replicas of the system.
48 :
49 : Calculated experimental data are given in input as ARG while reference experimental values
50 : can be given either from fixed components of other actions using PARARG or as numbers using
51 : PARAMETERS. The default behavior is that of averaging the data over the available replicas,
52 : if this is not wanted the keyword NOENSEMBLE prevent this averaging.
53 :
54 : Metadynamics Metainference \cite Bonomi:2016ge or more in general biased Metainference requires the knowledge of
55 : biasing potential in order to calculate the weighted average. In this case the value of the bias
56 : can be provided as the last argument in ARG and adding the keyword REWEIGHT. To avoid the noise
57 : resulting from the instantaneous value of the bias the weight of each replica can be averaged
58 : over a give time using the keyword AVERAGING.
59 :
60 : The data can be averaged by using multiple replicas and weighted for a bias if present.
61 : The functional form of Metainference can be chosen among four variants selected
62 : with NOISE=GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC which correspond to modelling the noise for
63 : the arguments as a single gaussian common to all the data points, a gaussian per data
64 : point, a single long-tailed gaussian common to all the data points, a log-tailed
65 : gaussian per data point or using two distinct noises as for the most general formulation of Metainference.
66 : In this latter case the noise of the replica-averaging is gaussian (one per data point) and the noise for
67 : the comparison with the experimental data can chosen using the keyword LIKELIHOOD
68 : between gaussian or log-normal (one per data point), furthermore the evolution of the estimated average
69 : over an infinite number of replicas is driven by DFTILDE.
70 :
71 : As for Metainference theory there are two sigma values: SIGMA_MEAN0 represent the
72 : error of calculating an average quantity using a finite set of replica and should
73 : be set as small as possible following the guidelines for replica-averaged simulations
74 : in the framework of the Maximum Entropy Principle. Alternatively, this can be obtained
75 : automatically using the internal sigma mean optimization as introduced in \cite Lohr:2017gc
76 : (OPTSIGMAMEAN=SEM), in this second case sigma_mean is estimated from the maximum standard error
77 : of the mean either over the simulation or over a defined time using the keyword AVERAGING.
78 : SIGMA_BIAS is an uncertainty parameter, sampled by a MC algorithm in the bounded interval
79 : defined by SIGMA_MIN and SIGMA_MAX. The initial value is set at SIGMA0. The MC move is a
80 : random displacement of maximum value equal to DSIGMA. If the number of data point is
81 : too large and the acceptance rate drops it is possible to make the MC move over mutually
82 : exclusive, random subset of size MC_CHUNKSIZE and run more than one move setting MC_STEPS
83 : in such a way that MC_CHUNKSIZE*MC_STEPS will cover all the data points.
84 :
85 : Calculated and experimental data can be compared modulo a scaling factor and/or an offset
86 : using SCALEDATA and/or ADDOFFSET, the sampling is obtained by a MC algorithm either using
87 : a flat or a gaussian prior setting it with SCALE_PRIOR or OFFSET_PRIOR.
88 :
89 : \par Examples
90 :
91 : In the following example we calculate a set of \ref RDC, take the replica-average of
92 : them and comparing them with a set of experimental values. RDCs are compared with
93 : the experimental data but for a multiplication factor SCALE that is also sampled by
94 : MC on-the-fly
95 :
96 : \plumedfile
97 : RDC ...
98 : LABEL=rdc
99 : SCALE=0.0001
100 : GYROM=-72.5388
101 : ATOMS1=22,23
102 : ATOMS2=25,27
103 : ATOMS3=29,31
104 : ATOMS4=33,34
105 : ... RDC
106 :
107 : METAINFERENCE ...
108 : ARG=rdc.*
109 : NOISETYPE=MGAUSS
110 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
111 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
112 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
113 : SIGMA_MEAN0=0.001
114 : LABEL=spe
115 : ... METAINFERENCE
116 :
117 : PRINT ARG=spe.bias FILE=BIAS STRIDE=1
118 : \endplumedfile
119 :
120 : in the following example instead of using one uncertainty parameter per data point we use
121 : a single uncertainty value in a long-tailed gaussian to take into account for outliers, furthermore
122 : the data are weighted for the bias applied to other variables of the system.
123 :
124 : \plumedfile
125 : RDC ...
126 : LABEL=rdc
127 : SCALE=0.0001
128 : GYROM=-72.5388
129 : ATOMS1=22,23
130 : ATOMS2=25,27
131 : ATOMS3=29,31
132 : ATOMS4=33,34
133 : ... RDC
134 :
135 : cv1: TORSION ATOMS=1,2,3,4
136 : cv2: TORSION ATOMS=2,3,4,5
137 : mm: METAD ARG=cv1,cv2 HEIGHT=0.5 SIGMA=0.3,0.3 PACE=200 BIASFACTOR=8 WALKERS_MPI
138 :
139 : METAINFERENCE ...
140 : #SETTINGS NREPLICAS=2
141 : ARG=rdc.*,mm.bias
142 : REWEIGHT
143 : NOISETYPE=OUTLIERS
144 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
145 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
146 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
147 : SIGMA_MEAN0=0.001
148 : LABEL=spe
149 : ... METAINFERENCE
150 : \endplumedfile
151 :
152 : (See also \ref RDC, \ref PBMETAD).
153 :
154 : */
155 : //+ENDPLUMEDOC
156 :
157 : class Metainference : public bias::Bias
158 : {
159 : // experimental values
160 : std::vector<double> parameters;
161 : // noise type
162 : unsigned noise_type_;
163 : enum { GAUSS, MGAUSS, OUTLIERS, MOUTLIERS, GENERIC };
164 : unsigned gen_likelihood_;
165 : enum { LIKE_GAUSS, LIKE_LOGN };
166 : // scale is data scaling factor
167 : // noise type
168 : unsigned scale_prior_;
169 : enum { SC_GAUSS, SC_FLAT };
170 : bool doscale_;
171 : double scale_;
172 : double scale_mu_;
173 : double scale_min_;
174 : double scale_max_;
175 : double Dscale_;
176 : // scale is data scaling factor
177 : // noise type
178 : unsigned offset_prior_;
179 : bool dooffset_;
180 : double offset_;
181 : double offset_mu_;
182 : double offset_min_;
183 : double offset_max_;
184 : double Doffset_;
185 : // scale and offset regression
186 : bool doregres_zero_;
187 : int nregres_zero_;
188 : // sigma is data uncertainty
189 : std::vector<double> sigma_;
190 : std::vector<double> sigma_min_;
191 : std::vector<double> sigma_max_;
192 : std::vector<double> Dsigma_;
193 : // sigma_mean is uncertainty in the mean estimate
194 : std::vector<double> sigma_mean2_;
195 : // this is the estimator of the mean value per replica for generic metainference
196 : std::vector<double> ftilde_;
197 : double Dftilde_;
198 :
199 : // temperature in kbt
200 : double kbt_;
201 :
202 : // Monte Carlo stuff
203 : std::vector<Random> random;
204 : unsigned MCsteps_;
205 : long unsigned MCaccept_;
206 : long unsigned MCacceptScale_;
207 : long unsigned MCacceptFT_;
208 : long unsigned MCtrial_;
209 : unsigned MCchunksize_;
210 :
211 : // output
212 : Value* valueScale;
213 : Value* valueOffset;
214 : Value* valueAccept;
215 : Value* valueAcceptScale;
216 : Value* valueAcceptFT;
217 : std::vector<Value*> valueSigma;
218 : std::vector<Value*> valueSigmaMean;
219 : std::vector<Value*> valueFtilde;
220 :
221 : // restart
222 : unsigned write_stride_;
223 : OFile sfile_;
224 :
225 : // others
226 : bool firstTime;
227 : std::vector<bool> firstTimeW;
228 : bool master;
229 : bool do_reweight_;
230 : unsigned do_optsigmamean_;
231 : unsigned nrep_;
232 : unsigned replica_;
233 : unsigned narg;
234 :
235 : // selector
236 : std::string selector_;
237 :
238 : // optimize sigma mean
239 : std::vector< std::vector < std::vector <double> > > sigma_mean2_last_;
240 : unsigned optsigmamean_stride_;
241 : // optimize sigma max
242 : unsigned N_optimized_step_;
243 : unsigned optimized_step_;
244 : bool sigmamax_opt_done_;
245 : std::vector<double> sigma_max_est_;
246 :
247 : // average weights
248 : unsigned average_weights_stride_;
249 : std::vector< std::vector <double> > average_weights_;
250 :
251 : double getEnergyMIGEN(const std::vector<double> &mean, const std::vector<double> &ftilde, const std::vector<double> &sigma,
252 : const double scale, const double offset);
253 : double getEnergySP(const std::vector<double> &mean, const std::vector<double> &sigma,
254 : const double scale, const double offset);
255 : double getEnergySPE(const std::vector<double> &mean, const std::vector<double> &sigma,
256 : const double scale, const double offset);
257 : double getEnergyGJ(const std::vector<double> &mean, const std::vector<double> &sigma,
258 : const double scale, const double offset);
259 : double getEnergyGJE(const std::vector<double> &mean, const std::vector<double> &sigma,
260 : const double scale, const double offset);
261 : void moveTilde(const std::vector<double> &mean_, double &old_energy);
262 : void moveScaleOffset(const std::vector<double> &mean_, double &old_energy);
263 : void moveSigmas(const std::vector<double> &mean_, double &old_energy, const unsigned i, const std::vector<unsigned> &indices, bool &breaknow);
264 : double doMonteCarlo(const std::vector<double> &mean);
265 : void getEnergyForceMIGEN(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
266 : void getEnergyForceSP(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
267 : void getEnergyForceSPE(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
268 : void getEnergyForceGJ(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
269 : void getEnergyForceGJE(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
270 : void get_weights(const unsigned iselect, double &weight, double &norm, double &neff);
271 : void replica_averaging(const double weight, const double norm, std::vector<double> &mean, std::vector<double> &dmean_b);
272 : void get_sigma_mean(const unsigned iselect, const double weight, const double norm, const double neff, const std::vector<double> &mean);
273 : void writeStatus();
274 : void do_regression_zero(const std::vector<double> &mean);
275 :
276 : public:
277 : explicit Metainference(const ActionOptions&);
278 : ~Metainference();
279 : void calculate() override;
280 : void update() override;
281 : static void registerKeywords(Keywords& keys);
282 : };
283 :
284 :
285 10457 : PLUMED_REGISTER_ACTION(Metainference,"METAINFERENCE")
286 :
287 20 : void Metainference::registerKeywords(Keywords& keys) {
288 20 : Bias::registerKeywords(keys);
289 20 : keys.use("ARG");
290 40 : keys.add("optional","PARARG","reference values for the experimental data, these can be provided as arguments without derivatives");
291 40 : keys.add("optional","PARAMETERS","reference values for the experimental data");
292 40 : keys.addFlag("NOENSEMBLE",false,"don't perform any replica-averaging");
293 40 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the latest ARG as energy");
294 40 : keys.add("optional","AVERAGING", "Stride for calculation of averaged weights and sigma_mean");
295 40 : keys.add("compulsory","NOISETYPE","MGAUSS","functional form of the noise (GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC)");
296 40 : keys.add("compulsory","LIKELIHOOD","GAUSS","the likelihood for the GENERIC metainference model, GAUSS or LOGN");
297 40 : keys.add("compulsory","DFTILDE","0.1","fraction of sigma_mean used to evolve ftilde");
298 40 : keys.addFlag("SCALEDATA",false,"Set to TRUE if you want to sample a scaling factor common to all values and replicas");
299 40 : keys.add("compulsory","SCALE0","1.0","initial value of the scaling factor");
300 40 : keys.add("compulsory","SCALE_PRIOR","FLAT","either FLAT or GAUSSIAN");
301 40 : keys.add("optional","SCALE_MIN","minimum value of the scaling factor");
302 40 : keys.add("optional","SCALE_MAX","maximum value of the scaling factor");
303 40 : keys.add("optional","DSCALE","maximum MC move of the scaling factor");
304 40 : keys.addFlag("ADDOFFSET",false,"Set to TRUE if you want to sample an offset common to all values and replicas");
305 40 : keys.add("compulsory","OFFSET0","0.0","initial value of the offset");
306 40 : keys.add("compulsory","OFFSET_PRIOR","FLAT","either FLAT or GAUSSIAN");
307 40 : keys.add("optional","OFFSET_MIN","minimum value of the offset");
308 40 : keys.add("optional","OFFSET_MAX","maximum value of the offset");
309 40 : keys.add("optional","DOFFSET","maximum MC move of the offset");
310 40 : keys.add("optional","REGRES_ZERO","stride for regression with zero offset");
311 40 : keys.add("compulsory","SIGMA0","1.0","initial value of the uncertainty parameter");
312 40 : keys.add("compulsory","SIGMA_MIN","0.0","minimum value of the uncertainty parameter");
313 40 : keys.add("compulsory","SIGMA_MAX","10.","maximum value of the uncertainty parameter");
314 40 : keys.add("optional","DSIGMA","maximum MC move of the uncertainty parameter");
315 40 : keys.add("compulsory","OPTSIGMAMEAN","NONE","Set to NONE/SEM to manually set sigma mean, or to estimate it on the fly");
316 40 : keys.add("optional","SIGMA_MEAN0","starting value for the uncertainty in the mean estimate");
317 40 : keys.add("optional","SIGMA_MAX_STEPS", "Number of steps used to optimise SIGMA_MAX, before that the SIGMA_MAX value is used");
318 40 : keys.add("optional","TEMP","the system temperature - this is only needed if code doesn't pass the temperature to plumed");
319 40 : keys.add("optional","MC_STEPS","number of MC steps");
320 40 : keys.add("optional","MC_CHUNKSIZE","MC chunksize");
321 40 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart/continuation of Metainference");
322 40 : keys.add("compulsory","WRITE_STRIDE","10000","write the status to a file every N steps, this can be used for restart/continuation");
323 40 : keys.add("optional","SELECTOR","name of selector");
324 40 : keys.add("optional","NSELECT","range of values for selector [0, N-1]");
325 20 : keys.use("RESTART");
326 40 : keys.addOutputComponent("sigma", "default", "uncertainty parameter");
327 40 : keys.addOutputComponent("sigmaMean", "default", "uncertainty in the mean estimate");
328 40 : keys.addOutputComponent("neff", "default", "effective number of replicas");
329 40 : keys.addOutputComponent("acceptSigma", "default", "MC acceptance for sigma values");
330 40 : keys.addOutputComponent("acceptScale", "SCALEDATA", "MC acceptance for scale value");
331 40 : keys.addOutputComponent("acceptFT", "GENERIC", "MC acceptance for general metainference f tilde value");
332 40 : keys.addOutputComponent("weight", "REWEIGHT", "weights of the weighted average");
333 40 : keys.addOutputComponent("biasDer", "REWEIGHT", "derivatives with respect to the bias");
334 40 : keys.addOutputComponent("scale", "SCALEDATA", "scale parameter");
335 40 : keys.addOutputComponent("offset", "ADDOFFSET", "offset parameter");
336 40 : keys.addOutputComponent("ftilde", "GENERIC", "ensemble average estimator");
337 20 : }
338 :
339 19 : Metainference::Metainference(const ActionOptions&ao):
340 : PLUMED_BIAS_INIT(ao),
341 19 : doscale_(false),
342 19 : scale_(1.),
343 19 : scale_mu_(0),
344 19 : scale_min_(1),
345 19 : scale_max_(-1),
346 19 : Dscale_(-1),
347 19 : dooffset_(false),
348 19 : offset_(0.),
349 19 : offset_mu_(0),
350 19 : offset_min_(1),
351 19 : offset_max_(-1),
352 19 : Doffset_(-1),
353 19 : doregres_zero_(false),
354 19 : nregres_zero_(0),
355 19 : Dftilde_(0.1),
356 19 : random(3),
357 19 : MCsteps_(1),
358 19 : MCaccept_(0),
359 19 : MCacceptScale_(0),
360 19 : MCacceptFT_(0),
361 19 : MCtrial_(0),
362 19 : MCchunksize_(0),
363 19 : write_stride_(0),
364 19 : firstTime(true),
365 19 : do_reweight_(false),
366 19 : do_optsigmamean_(0),
367 19 : optsigmamean_stride_(0),
368 19 : N_optimized_step_(0),
369 19 : optimized_step_(0),
370 19 : sigmamax_opt_done_(false),
371 19 : average_weights_stride_(1)
372 : {
373 19 : bool noensemble = false;
374 19 : parseFlag("NOENSEMBLE", noensemble);
375 :
376 : // set up replica stuff
377 19 : master = (comm.Get_rank()==0);
378 19 : if(master) {
379 11 : nrep_ = multi_sim_comm.Get_size();
380 11 : replica_ = multi_sim_comm.Get_rank();
381 11 : if(noensemble) nrep_ = 1;
382 : } else {
383 8 : nrep_ = 0;
384 8 : replica_ = 0;
385 : }
386 19 : comm.Sum(&nrep_,1);
387 19 : comm.Sum(&replica_,1);
388 :
389 19 : unsigned nsel = 1;
390 19 : parse("SELECTOR", selector_);
391 38 : parse("NSELECT", nsel);
392 : // do checks
393 19 : if(selector_.length()>0 && nsel<=1) error("With SELECTOR active, NSELECT must be greater than 1");
394 19 : if(selector_.length()==0 && nsel>1) error("With NSELECT greater than 1, you must specify SELECTOR");
395 :
396 : // initialise firstTimeW
397 19 : firstTimeW.resize(nsel, true);
398 :
399 : // reweight implies a different number of arguments (the latest one must always be the bias)
400 19 : parseFlag("REWEIGHT", do_reweight_);
401 19 : if(do_reweight_&&nrep_<2) error("REWEIGHT can only be used in parallel with 2 or more replicas");
402 34 : if(!getRestart()) average_weights_.resize(nsel, std::vector<double> (nrep_, 1./static_cast<double>(nrep_)));
403 8 : else average_weights_.resize(nsel, std::vector<double> (nrep_, 0.));
404 19 : narg = getNumberOfArguments();
405 19 : if(do_reweight_) narg--;
406 :
407 19 : unsigned averaging=0;
408 19 : parse("AVERAGING", averaging);
409 19 : if(averaging>0) {
410 0 : average_weights_stride_ = averaging;
411 0 : optsigmamean_stride_ = averaging;
412 : }
413 :
414 38 : parseVector("PARAMETERS",parameters);
415 19 : if(parameters.size()!=static_cast<unsigned>(narg)&&!parameters.empty())
416 0 : error("Size of PARAMETERS array should be either 0 or the same as of the number of arguments in ARG1");
417 :
418 : std::vector<Value*> arg2;
419 38 : parseArgumentList("PARARG",arg2);
420 19 : if(!arg2.empty()) {
421 4 : if(parameters.size()>0) error("It is not possible to use PARARG and PARAMETERS together");
422 4 : if(arg2.size()!=narg) error("Size of PARARG array should be the same as number for arguments in ARG");
423 2360 : for(unsigned i=0; i<arg2.size(); i++) {
424 2356 : parameters.push_back(arg2[i]->get());
425 2356 : if(arg2[i]->hasDerivatives()==true) error("PARARG can only accept arguments without derivatives");
426 : }
427 : }
428 :
429 19 : if(parameters.size()!=narg)
430 0 : error("PARARG or PARAMETERS arrays should include the same number of elements as the arguments in ARG");
431 :
432 : std::string stringa_noise;
433 38 : parse("NOISETYPE",stringa_noise);
434 19 : if(stringa_noise=="GAUSS") noise_type_ = GAUSS;
435 18 : else if(stringa_noise=="MGAUSS") noise_type_ = MGAUSS;
436 10 : else if(stringa_noise=="OUTLIERS") noise_type_ = OUTLIERS;
437 5 : else if(stringa_noise=="MOUTLIERS") noise_type_ = MOUTLIERS;
438 1 : else if(stringa_noise=="GENERIC") noise_type_ = GENERIC;
439 0 : else error("Unknown noise type!");
440 :
441 19 : if(noise_type_== GENERIC) {
442 : std::string stringa_like;
443 2 : parse("LIKELIHOOD",stringa_like);
444 1 : if(stringa_like=="GAUSS") gen_likelihood_ = LIKE_GAUSS;
445 0 : else if(stringa_like=="LOGN") gen_likelihood_ = LIKE_LOGN;
446 0 : else error("Unknown likelihood type!");
447 :
448 2 : parse("DFTILDE",Dftilde_);
449 : }
450 :
451 38 : parse("WRITE_STRIDE",write_stride_);
452 : std::string status_file_name_;
453 38 : parse("STATUS_FILE",status_file_name_);
454 38 : if(status_file_name_=="") status_file_name_ = "MISTATUS"+getLabel();
455 0 : else status_file_name_ = status_file_name_+getLabel();
456 :
457 : std::string stringa_optsigma;
458 38 : parse("OPTSIGMAMEAN", stringa_optsigma);
459 19 : if(stringa_optsigma=="NONE") do_optsigmamean_=0;
460 0 : else if(stringa_optsigma=="SEM") do_optsigmamean_=1;
461 0 : else if(stringa_optsigma=="SEM_MAX") do_optsigmamean_=2;
462 :
463 19 : unsigned aver_max_steps=0;
464 19 : parse("SIGMA_MAX_STEPS", aver_max_steps);
465 19 : if(aver_max_steps==0&&do_optsigmamean_==2) aver_max_steps=averaging*2000;
466 19 : if(aver_max_steps>0&&do_optsigmamean_<2) error("SIGMA_MAX_STEPS can only be used together with OPTSIGMAMEAN=SEM_MAX");
467 19 : if(aver_max_steps>0&&do_optsigmamean_==2) N_optimized_step_=aver_max_steps;
468 19 : if(aver_max_steps>0&&aver_max_steps<averaging) error("SIGMA_MAX_STEPS must be greater than AVERAGING");
469 :
470 : // resize std::vector for sigma_mean history
471 19 : sigma_mean2_last_.resize(nsel);
472 38 : for(unsigned i=0; i<nsel; i++) sigma_mean2_last_[i].resize(narg);
473 :
474 : std::vector<double> read_sigma_mean_;
475 19 : parseVector("SIGMA_MEAN0",read_sigma_mean_);
476 19 : if(do_optsigmamean_==0 && read_sigma_mean_.size()==0 && !getRestart())
477 0 : error("If you don't use OPTSIGMAMEAN and you are not RESTARTING then you MUST SET SIGMA_MEAN0");
478 :
479 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
480 13 : if(read_sigma_mean_.size()==narg) {
481 0 : sigma_mean2_.resize(narg);
482 0 : for(unsigned i=0; i<narg; i++) sigma_mean2_[i]=read_sigma_mean_[i]*read_sigma_mean_[i];
483 13 : } else if(read_sigma_mean_.size()==1) {
484 13 : sigma_mean2_.resize(narg,read_sigma_mean_[0]*read_sigma_mean_[0]);
485 0 : } else if(read_sigma_mean_.size()==0) {
486 0 : sigma_mean2_.resize(narg,0.000001);
487 : } else {
488 0 : error("SIGMA_MEAN0 can accept either one single value or as many values as the arguments (with NOISETYPE=MGAUSS|MOUTLIERS)");
489 : }
490 : // set the initial value for the history
491 2416 : for(unsigned i=0; i<nsel; i++) for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j].push_back(sigma_mean2_[j]);
492 : } else {
493 6 : if(read_sigma_mean_.size()==1) {
494 6 : sigma_mean2_.resize(1, read_sigma_mean_[0]*read_sigma_mean_[0]);
495 0 : } else if(read_sigma_mean_.size()==0) {
496 0 : sigma_mean2_.resize(1, 0.000001);
497 : } else {
498 0 : error("If you want to use more than one SIGMA_MEAN0 you should use NOISETYPE=MGAUSS|MOUTLIERS");
499 : }
500 : // set the initial value for the history
501 37 : for(unsigned i=0; i<nsel; i++) for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j].push_back(sigma_mean2_[0]);
502 : }
503 :
504 19 : parseFlag("SCALEDATA", doscale_);
505 19 : if(doscale_) {
506 : std::string stringa_noise;
507 24 : parse("SCALE_PRIOR",stringa_noise);
508 12 : if(stringa_noise=="GAUSSIAN") scale_prior_ = SC_GAUSS;
509 12 : else if(stringa_noise=="FLAT") scale_prior_ = SC_FLAT;
510 0 : else error("Unknown SCALE_PRIOR type!");
511 12 : parse("SCALE0",scale_);
512 12 : parse("DSCALE",Dscale_);
513 12 : if(Dscale_<0.) error("DSCALE must be set when using SCALEDATA");
514 12 : if(scale_prior_==SC_GAUSS) {
515 0 : scale_mu_=scale_;
516 : } else {
517 12 : parse("SCALE_MIN",scale_min_);
518 12 : parse("SCALE_MAX",scale_max_);
519 12 : if(scale_max_<scale_min_) error("SCALE_MAX and SCALE_MIN must be set when using SCALE_PRIOR=FLAT");
520 : }
521 : }
522 :
523 19 : parseFlag("ADDOFFSET", dooffset_);
524 19 : if(dooffset_) {
525 : std::string stringa_noise;
526 4 : parse("OFFSET_PRIOR",stringa_noise);
527 2 : if(stringa_noise=="GAUSSIAN") offset_prior_ = SC_GAUSS;
528 2 : else if(stringa_noise=="FLAT") offset_prior_ = SC_FLAT;
529 0 : else error("Unknown OFFSET_PRIOR type!");
530 2 : parse("OFFSET0",offset_);
531 2 : parse("DOFFSET",Doffset_);
532 2 : if(offset_prior_==SC_GAUSS) {
533 0 : offset_mu_=offset_;
534 0 : if(Doffset_<0.) error("DOFFSET must be set when using OFFSET_PRIOR=GAUSS");
535 : } else {
536 2 : parse("OFFSET_MIN",offset_min_);
537 2 : parse("OFFSET_MAX",offset_max_);
538 2 : if(Doffset_<0) Doffset_ = 0.05*(offset_max_ - offset_min_);
539 2 : if(offset_max_<offset_min_) error("OFFSET_MAX and OFFSET_MIN must be set when using OFFSET_PRIOR=FLAT");
540 : }
541 : }
542 :
543 : // regression with zero intercept
544 19 : parse("REGRES_ZERO", nregres_zero_);
545 19 : if(nregres_zero_>0) {
546 : // set flag
547 0 : doregres_zero_=true;
548 : // check if already sampling scale and offset
549 0 : if(doscale_) error("REGRES_ZERO and SCALEDATA are mutually exclusive");
550 0 : if(dooffset_) error("REGRES_ZERO and ADDOFFSET are mutually exclusive");
551 : }
552 :
553 : std::vector<double> readsigma;
554 19 : parseVector("SIGMA0",readsigma);
555 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma.size()>1)
556 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
557 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
558 13 : sigma_.resize(readsigma.size());
559 13 : sigma_=readsigma;
560 6 : } else sigma_.resize(1, readsigma[0]);
561 :
562 : std::vector<double> readsigma_min;
563 19 : parseVector("SIGMA_MIN",readsigma_min);
564 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_min.size()>1)
565 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
566 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
567 13 : sigma_min_.resize(readsigma_min.size());
568 13 : sigma_min_=readsigma_min;
569 6 : } else sigma_min_.resize(1, readsigma_min[0]);
570 :
571 : std::vector<double> readsigma_max;
572 19 : parseVector("SIGMA_MAX",readsigma_max);
573 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1)
574 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
575 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
576 13 : sigma_max_.resize(readsigma_max.size());
577 13 : sigma_max_=readsigma_max;
578 6 : } else sigma_max_.resize(1, readsigma_max[0]);
579 :
580 19 : if(sigma_max_.size()!=sigma_min_.size()) error("The number of values for SIGMA_MIN and SIGMA_MAX must be the same");
581 :
582 : std::vector<double> read_dsigma;
583 19 : parseVector("DSIGMA",read_dsigma);
584 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1)
585 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
586 19 : if(read_dsigma.size()>0) {
587 19 : Dsigma_.resize(read_dsigma.size());
588 19 : Dsigma_=read_dsigma;
589 : } else {
590 0 : Dsigma_.resize(sigma_max_.size(), -1.);
591 : /* in this case Dsigma is initialised after reading the restart file if present */
592 : }
593 :
594 : // monte carlo stuff
595 19 : parse("MC_STEPS",MCsteps_);
596 19 : parse("MC_CHUNKSIZE", MCchunksize_);
597 : // get temperature
598 19 : double temp=0.0;
599 19 : parse("TEMP",temp);
600 19 : if(temp>0.0) kbt_=plumed.getAtoms().getKBoltzmann()*temp;
601 0 : else kbt_=plumed.getAtoms().getKbT();
602 19 : if(kbt_==0.0) error("Unless the MD engine passes the temperature to plumed, you must specify it using TEMP");
603 :
604 19 : checkRead();
605 :
606 : // set sigma_bias
607 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
608 13 : if(sigma_.size()==1) {
609 13 : double tmp = sigma_[0];
610 13 : sigma_.resize(narg, tmp);
611 0 : } else if(sigma_.size()>1&&sigma_.size()!=narg) {
612 0 : error("SIGMA0 can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
613 : }
614 13 : if(sigma_min_.size()==1) {
615 13 : double tmp = sigma_min_[0];
616 13 : sigma_min_.resize(narg, tmp);
617 0 : } else if(sigma_min_.size()>1&&sigma_min_.size()!=narg) {
618 0 : error("SIGMA_MIN can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
619 : }
620 13 : if(sigma_max_.size()==1) {
621 13 : double tmp = sigma_max_[0];
622 13 : sigma_max_.resize(narg, tmp);
623 0 : } else if(sigma_max_.size()>1&&sigma_max_.size()!=narg) {
624 0 : error("SIGMA_MAX can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
625 : }
626 13 : if(Dsigma_.size()==1) {
627 13 : double tmp = Dsigma_[0];
628 13 : Dsigma_.resize(narg, tmp);
629 0 : } else if(Dsigma_.size()>1&&Dsigma_.size()!=narg) {
630 0 : error("DSIGMA can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
631 : }
632 : }
633 :
634 19 : sigma_max_est_.resize(sigma_max_.size(), 0.);
635 :
636 19 : IFile restart_sfile;
637 19 : restart_sfile.link(*this);
638 19 : if(getRestart()&&restart_sfile.FileExist(status_file_name_)) {
639 4 : firstTime = false;
640 8 : for(unsigned i=0; i<nsel; i++) firstTimeW[i] = false;
641 4 : restart_sfile.open(status_file_name_);
642 4 : log.printf(" Restarting from %s\n", status_file_name_.c_str());
643 : double dummy;
644 8 : if(restart_sfile.scanField("time",dummy)) {
645 : // check for syncronisation
646 4 : std::vector<double> dummy_time(nrep_,0);
647 4 : if(master&&nrep_>1) {
648 2 : dummy_time[replica_] = dummy;
649 2 : multi_sim_comm.Sum(dummy_time);
650 : }
651 4 : comm.Sum(dummy_time);
652 8 : for(unsigned i=1; i<nrep_; i++) {
653 4 : std::string msg = "METAINFERENCE restart files " + status_file_name_ + " are not in sync";
654 4 : if(dummy_time[i]!=dummy_time[0]) plumed_merror(msg);
655 : }
656 : // nsel
657 8 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
658 : std::string msg_i;
659 4 : Tools::convert(i,msg_i);
660 : // narg
661 4 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
662 20 : for(unsigned j=0; j<narg; ++j) {
663 : std::string msg_j;
664 16 : Tools::convert(j,msg_j);
665 16 : std::string msg = msg_i+"_"+msg_j;
666 : double read_sm;
667 16 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
668 16 : sigma_mean2_last_[i][j][0] = read_sm*read_sm;
669 : }
670 : }
671 4 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
672 : double read_sm;
673 : std::string msg_j;
674 0 : Tools::convert(0,msg_j);
675 0 : std::string msg = msg_i+"_"+msg_j;
676 0 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
677 0 : for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j][0] = read_sm*read_sm;
678 : }
679 : }
680 :
681 20 : for(unsigned i=0; i<sigma_.size(); ++i) {
682 : std::string msg;
683 16 : Tools::convert(i,msg);
684 32 : restart_sfile.scanField("sigma_"+msg,sigma_[i]);
685 : }
686 20 : for(unsigned i=0; i<sigma_max_.size(); ++i) {
687 : std::string msg;
688 16 : Tools::convert(i,msg);
689 16 : restart_sfile.scanField("sigma_max_"+msg,sigma_max_[i]);
690 16 : sigmamax_opt_done_=true;
691 : }
692 4 : if(noise_type_==GENERIC) {
693 0 : for(unsigned i=0; i<ftilde_.size(); ++i) {
694 : std::string msg;
695 0 : Tools::convert(i,msg);
696 0 : restart_sfile.scanField("ftilde_"+msg,ftilde_[i]);
697 : }
698 : }
699 4 : restart_sfile.scanField("scale0_",scale_);
700 4 : restart_sfile.scanField("offset0_",offset_);
701 :
702 8 : for(unsigned i=0; i<nsel; i++) {
703 : std::string msg;
704 4 : Tools::convert(i,msg);
705 : double tmp_w;
706 4 : restart_sfile.scanField("weight_"+msg,tmp_w);
707 4 : if(master) {
708 2 : average_weights_[i][replica_] = tmp_w;
709 2 : if(nrep_>1) multi_sim_comm.Sum(&average_weights_[i][0], nrep_);
710 : }
711 4 : comm.Sum(&average_weights_[i][0], nrep_);
712 : }
713 :
714 : }
715 4 : restart_sfile.scanField();
716 4 : restart_sfile.close();
717 : }
718 :
719 : /* If DSIGMA is not yet initialised do it now */
720 2415 : for(unsigned i=0; i<sigma_max_.size(); i++) if(Dsigma_[i]==-1) Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
721 :
722 19 : switch(noise_type_) {
723 1 : case GENERIC:
724 1 : log.printf(" with general metainference ");
725 1 : if(gen_likelihood_==LIKE_GAUSS) log.printf(" and a gaussian likelihood\n");
726 0 : else if(gen_likelihood_==LIKE_LOGN) log.printf(" and a log-normal likelihood\n");
727 1 : log.printf(" ensemble average parameter sampled with a step %lf of sigma_mean\n", Dftilde_);
728 : break;
729 1 : case GAUSS:
730 1 : log.printf(" with gaussian noise and a single noise parameter for all the data\n");
731 : break;
732 8 : case MGAUSS:
733 8 : log.printf(" with gaussian noise and a noise parameter for each data point\n");
734 : break;
735 5 : case OUTLIERS:
736 5 : log.printf(" with long tailed gaussian noise and a single noise parameter for all the data\n");
737 : break;
738 4 : case MOUTLIERS:
739 4 : log.printf(" with long tailed gaussian noise and a noise parameter for each data point\n");
740 : break;
741 : }
742 :
743 19 : if(doscale_) {
744 : // check that the scale value is the same for all replicas
745 12 : std::vector<double> dummy_scale(nrep_,0);
746 12 : if(master&&nrep_>1) {
747 6 : dummy_scale[replica_] = scale_;
748 6 : multi_sim_comm.Sum(dummy_scale);
749 : }
750 12 : comm.Sum(dummy_scale);
751 24 : for(unsigned i=1; i<nrep_; i++) {
752 12 : std::string msg = "The SCALE value must be the same for all replicas: check your input or restart file";
753 12 : if(dummy_scale[i]!=dummy_scale[0]) plumed_merror(msg);
754 : }
755 12 : log.printf(" sampling a common scaling factor with:\n");
756 12 : log.printf(" initial scale parameter %f\n",scale_);
757 12 : if(scale_prior_==SC_GAUSS) {
758 0 : log.printf(" gaussian prior with mean %f and width %f\n",scale_mu_,Dscale_);
759 : }
760 12 : if(scale_prior_==SC_FLAT) {
761 12 : log.printf(" flat prior between %f - %f\n",scale_min_,scale_max_);
762 12 : log.printf(" maximum MC move of scale parameter %f\n",Dscale_);
763 : }
764 : }
765 :
766 19 : if(dooffset_) {
767 : // check that the offset value is the same for all replicas
768 2 : std::vector<double> dummy_offset(nrep_,0);
769 2 : if(master&&nrep_>1) {
770 0 : dummy_offset[replica_] = offset_;
771 0 : multi_sim_comm.Sum(dummy_offset);
772 : }
773 2 : comm.Sum(dummy_offset);
774 2 : for(unsigned i=1; i<nrep_; i++) {
775 0 : std::string msg = "The OFFSET value must be the same for all replicas: check your input or restart file";
776 0 : if(dummy_offset[i]!=dummy_offset[0]) plumed_merror(msg);
777 : }
778 2 : log.printf(" sampling a common offset with:\n");
779 2 : log.printf(" initial offset parameter %f\n",offset_);
780 2 : if(offset_prior_==SC_GAUSS) {
781 0 : log.printf(" gaussian prior with mean %f and width %f\n",offset_mu_,Doffset_);
782 : }
783 2 : if(offset_prior_==SC_FLAT) {
784 2 : log.printf(" flat prior between %f - %f\n",offset_min_,offset_max_);
785 2 : log.printf(" maximum MC move of offset parameter %f\n",Doffset_);
786 : }
787 : }
788 :
789 19 : if(doregres_zero_)
790 0 : log.printf(" doing regression with zero intercept with stride: %d\n", nregres_zero_);
791 :
792 19 : log.printf(" number of experimental data points %u\n",narg);
793 19 : log.printf(" number of replicas %u\n",nrep_);
794 19 : log.printf(" initial data uncertainties");
795 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f", sigma_[i]);
796 19 : log.printf("\n");
797 19 : log.printf(" minimum data uncertainties");
798 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",sigma_min_[i]);
799 19 : log.printf("\n");
800 19 : log.printf(" maximum data uncertainties");
801 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",sigma_max_[i]);
802 19 : log.printf("\n");
803 19 : log.printf(" maximum MC move of data uncertainties");
804 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",Dsigma_[i]);
805 19 : log.printf("\n");
806 19 : log.printf(" temperature of the system %f\n",kbt_);
807 19 : log.printf(" MC steps %u\n",MCsteps_);
808 19 : log.printf(" initial standard errors of the mean");
809 2415 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) log.printf(" %f", std::sqrt(sigma_mean2_[i]));
810 19 : log.printf("\n");
811 :
812 19 : if(do_reweight_) {
813 16 : addComponent("biasDer");
814 16 : componentIsNotPeriodic("biasDer");
815 16 : addComponent("weight");
816 32 : componentIsNotPeriodic("weight");
817 : }
818 :
819 19 : addComponent("neff");
820 19 : componentIsNotPeriodic("neff");
821 :
822 19 : if(doscale_ || doregres_zero_) {
823 12 : addComponent("scale");
824 12 : componentIsNotPeriodic("scale");
825 12 : valueScale=getPntrToComponent("scale");
826 : }
827 :
828 19 : if(dooffset_) {
829 2 : addComponent("offset");
830 2 : componentIsNotPeriodic("offset");
831 2 : valueOffset=getPntrToComponent("offset");
832 : }
833 :
834 19 : if(dooffset_||doscale_) {
835 14 : addComponent("acceptScale");
836 14 : componentIsNotPeriodic("acceptScale");
837 14 : valueAcceptScale=getPntrToComponent("acceptScale");
838 : }
839 :
840 19 : if(noise_type_==GENERIC) {
841 1 : addComponent("acceptFT");
842 1 : componentIsNotPeriodic("acceptFT");
843 1 : valueAcceptFT=getPntrToComponent("acceptFT");
844 : }
845 :
846 19 : addComponent("acceptSigma");
847 19 : componentIsNotPeriodic("acceptSigma");
848 19 : valueAccept=getPntrToComponent("acceptSigma");
849 :
850 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
851 2403 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) {
852 2390 : std::string num; Tools::convert(i,num);
853 4780 : addComponent("sigmaMean-"+num); componentIsNotPeriodic("sigmaMean-"+num);
854 2390 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean-"+num));
855 2390 : getPntrToComponent("sigmaMean-"+num)->set(std::sqrt(sigma_mean2_[i]));
856 4780 : addComponent("sigma-"+num); componentIsNotPeriodic("sigma-"+num);
857 2390 : valueSigma.push_back(getPntrToComponent("sigma-"+num));
858 2390 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
859 2390 : if(noise_type_==GENERIC) {
860 4 : addComponent("ftilde-"+num); componentIsNotPeriodic("ftilde-"+num);
861 2 : valueFtilde.push_back(getPntrToComponent("ftilde-"+num));
862 : }
863 : }
864 13 : } else {
865 12 : addComponent("sigmaMean"); componentIsNotPeriodic("sigmaMean");
866 6 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean"));
867 6 : getPntrToComponent("sigmaMean")->set(std::sqrt(sigma_mean2_[0]));
868 12 : addComponent("sigma"); componentIsNotPeriodic("sigma");
869 6 : valueSigma.push_back(getPntrToComponent("sigma"));
870 12 : getPntrToComponent("sigma")->set(sigma_[0]);
871 : }
872 :
873 : // initialize random seed
874 : unsigned iseed;
875 19 : if(master) {
876 11 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
877 11 : iseed = static_cast<unsigned>(ts)+replica_;
878 : } else {
879 8 : iseed = 0;
880 : }
881 19 : comm.Sum(&iseed, 1);
882 : // this is used for ftilde and sigma both the move and the acceptance
883 : // this is different for each replica
884 19 : random[0].setSeed(-iseed);
885 19 : if(doscale_||dooffset_) {
886 : // in this case we want the same seed everywhere
887 14 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
888 14 : iseed = static_cast<unsigned>(ts);
889 14 : if(master&&nrep_>1) multi_sim_comm.Bcast(iseed,0);
890 14 : comm.Bcast(iseed,0);
891 : // this is used for scale and offset sampling and acceptance
892 14 : random[1].setSeed(-iseed);
893 : }
894 : // this is used for random chunk of sigmas, and it is different for each replica
895 19 : if(master) {
896 11 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
897 11 : iseed = static_cast<unsigned>(ts)+replica_;
898 : } else {
899 8 : iseed = 0;
900 : }
901 19 : comm.Sum(&iseed, 1);
902 19 : random[2].setSeed(-iseed);
903 :
904 : // outfile stuff
905 19 : if(write_stride_>0) {
906 19 : sfile_.link(*this);
907 19 : sfile_.open(status_file_name_);
908 : }
909 :
910 38 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
911 51 : if(do_reweight_) log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
912 19 : if(do_optsigmamean_>0) log<<plumed.cite("Loehr, Jussupow, Camilloni, J. Chem. Phys. 146, 165102 (2017)");
913 38 : log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
914 19 : log<<"\n";
915 38 : }
916 :
917 38 : Metainference::~Metainference()
918 : {
919 19 : if(sfile_.isOpen()) sfile_.close();
920 114 : }
921 :
922 156 : double Metainference::getEnergySP(const std::vector<double> &mean, const std::vector<double> &sigma,
923 : const double scale, const double offset)
924 : {
925 156 : const double scale2 = scale*scale;
926 156 : const double sm2 = sigma_mean2_[0];
927 156 : const double ss2 = sigma[0]*sigma[0] + scale2*sm2;
928 156 : const double sss = sigma[0]*sigma[0] + sm2;
929 :
930 : double ene = 0.0;
931 156 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
932 : {
933 : #pragma omp for reduction( + : ene)
934 : for(unsigned i=0; i<narg; ++i) {
935 : const double dev = scale*mean[i]-parameters[i]+offset;
936 : const double a2 = 0.5*dev*dev + ss2;
937 : if(sm2 > 0.0) {
938 : ene += std::log(2.0*a2/(1.0-std::exp(-a2/sm2)));
939 : }
940 : else {
941 : ene += std::log(2.0*a2);
942 : }
943 : }
944 : }
945 : // add one single Jeffrey's prior and one normalisation per data point
946 156 : ene += 0.5*std::log(sss) + static_cast<double>(narg)*0.5*std::log(0.5*M_PI*M_PI/ss2);
947 156 : if(doscale_ || doregres_zero_) ene += 0.5*std::log(sss);
948 156 : if(dooffset_) ene += 0.5*std::log(sss);
949 156 : return kbt_ * ene;
950 : }
951 :
952 144 : double Metainference::getEnergySPE(const std::vector<double> &mean, const std::vector<double> &sigma,
953 : const double scale, const double offset)
954 : {
955 144 : const double scale2 = scale*scale;
956 : double ene = 0.0;
957 144 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
958 : {
959 : #pragma omp for reduction( + : ene)
960 : for(unsigned i=0; i<narg; ++i) {
961 : const double sm2 = sigma_mean2_[i];
962 : const double ss2 = sigma[i]*sigma[i] + scale2*sm2;
963 : const double sss = sigma[i]*sigma[i] + sm2;
964 : const double dev = scale*mean[i]-parameters[i]+offset;
965 : const double a2 = 0.5*dev*dev + ss2;
966 : if(sm2 > 0.0) {
967 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2/(1.0-std::exp(-a2/sm2)));
968 : }
969 : else {
970 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2);
971 : }
972 : if(doscale_ || doregres_zero_) ene += 0.5*std::log(sss);
973 : if(dooffset_) ene += 0.5*std::log(sss);
974 : }
975 : }
976 144 : return kbt_ * ene;
977 : }
978 :
979 48 : double Metainference::getEnergyMIGEN(const std::vector<double> &mean, const std::vector<double> &ftilde, const std::vector<double> &sigma,
980 : const double scale, const double offset)
981 : {
982 : double ene = 0.0;
983 48 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
984 : {
985 : #pragma omp for reduction( + : ene)
986 : for(unsigned i=0; i<narg; ++i) {
987 : const double inv_sb2 = 1./(sigma[i]*sigma[i]);
988 : const double inv_sm2 = 1./sigma_mean2_[i];
989 : double devb = 0;
990 : if(gen_likelihood_==LIKE_GAUSS) devb = scale*ftilde[i]-parameters[i]+offset;
991 : else if(gen_likelihood_==LIKE_LOGN) devb = std::log(scale*ftilde[i]/parameters[i]);
992 : double devm = mean[i] - ftilde[i];
993 : // deviation + normalisation + jeffrey
994 : double normb = 0.;
995 : if(gen_likelihood_==LIKE_GAUSS) normb = -0.5*std::log(0.5/M_PI*inv_sb2);
996 : else if(gen_likelihood_==LIKE_LOGN) normb = -0.5*std::log(0.5/M_PI*inv_sb2/(parameters[i]*parameters[i]));
997 : const double normm = -0.5*std::log(0.5/M_PI*inv_sm2);
998 : const double jeffreys = -0.5*std::log(2.*inv_sb2);
999 : ene += 0.5*devb*devb*inv_sb2 + 0.5*devm*devm*inv_sm2 + normb + normm + jeffreys;
1000 : if(doscale_ || doregres_zero_) ene += jeffreys;
1001 : if(dooffset_) ene += jeffreys;
1002 : }
1003 : }
1004 48 : return kbt_ * ene;
1005 : }
1006 :
1007 36 : double Metainference::getEnergyGJ(const std::vector<double> &mean, const std::vector<double> &sigma,
1008 : const double scale, const double offset)
1009 : {
1010 36 : const double scale2 = scale*scale;
1011 36 : const double inv_s2 = 1./(sigma[0]*sigma[0] + scale2*sigma_mean2_[0]);
1012 36 : const double inv_sss = 1./(sigma[0]*sigma[0] + sigma_mean2_[0]);
1013 :
1014 : double ene = 0.0;
1015 36 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1016 : {
1017 : #pragma omp for reduction( + : ene)
1018 : for(unsigned i=0; i<narg; ++i) {
1019 : double dev = scale*mean[i]-parameters[i]+offset;
1020 : ene += 0.5*dev*dev*inv_s2;
1021 : }
1022 : }
1023 36 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
1024 36 : const double jeffreys = -0.5*std::log(2.*inv_sss);
1025 : // add Jeffrey's prior in case one sigma for all data points + one normalisation per datapoint
1026 36 : ene += jeffreys + static_cast<double>(narg)*normalisation;
1027 36 : if(doscale_ || doregres_zero_) ene += jeffreys;
1028 36 : if(dooffset_) ene += jeffreys;
1029 :
1030 36 : return kbt_ * ene;
1031 : }
1032 :
1033 152 : double Metainference::getEnergyGJE(const std::vector<double> &mean, const std::vector<double> &sigma,
1034 : const double scale, const double offset)
1035 : {
1036 152 : const double scale2 = scale*scale;
1037 :
1038 : double ene = 0.0;
1039 152 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1040 : {
1041 : #pragma omp for reduction( + : ene)
1042 : for(unsigned i=0; i<narg; ++i) {
1043 : const double inv_s2 = 1./(sigma[i]*sigma[i] + scale2*sigma_mean2_[i]);
1044 : const double inv_sss = 1./(sigma[i]*sigma[i] + sigma_mean2_[i]);
1045 : double dev = scale*mean[i]-parameters[i]+offset;
1046 : // deviation + normalisation + jeffrey
1047 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
1048 : const double jeffreys = -0.5*std::log(2.*inv_sss);
1049 : ene += 0.5*dev*dev*inv_s2 + normalisation + jeffreys;
1050 : if(doscale_ || doregres_zero_) ene += jeffreys;
1051 : if(dooffset_) ene += jeffreys;
1052 : }
1053 : }
1054 152 : return kbt_ * ene;
1055 : }
1056 :
1057 12 : void Metainference::moveTilde(const std::vector<double> &mean_, double &old_energy)
1058 : {
1059 12 : std::vector<double> new_ftilde(sigma_.size());
1060 12 : new_ftilde = ftilde_;
1061 :
1062 : // change all tildes
1063 36 : for(unsigned j=0; j<sigma_.size(); j++) {
1064 24 : const double r3 = random[0].Gaussian();
1065 24 : const double ds3 = Dftilde_*std::sqrt(sigma_mean2_[j])*r3;
1066 24 : new_ftilde[j] = ftilde_[j] + ds3;
1067 : }
1068 : // calculate new energy
1069 12 : double new_energy = getEnergyMIGEN(mean_,new_ftilde,sigma_,scale_,offset_);
1070 :
1071 : // accept or reject
1072 12 : const double delta = ( new_energy - old_energy ) / kbt_;
1073 : // if delta is negative always accept move
1074 12 : if( delta <= 0.0 ) {
1075 12 : old_energy = new_energy;
1076 12 : ftilde_ = new_ftilde;
1077 12 : MCacceptFT_++;
1078 : // otherwise extract random number
1079 : } else {
1080 0 : const double s = random[0].RandU01();
1081 0 : if( s < std::exp(-delta) ) {
1082 0 : old_energy = new_energy;
1083 0 : ftilde_ = new_ftilde;
1084 0 : MCacceptFT_++;
1085 : }
1086 : }
1087 12 : }
1088 :
1089 168 : void Metainference::moveScaleOffset(const std::vector<double> &mean_, double &old_energy)
1090 : {
1091 168 : double new_scale = scale_;
1092 :
1093 168 : if(doscale_) {
1094 144 : if(scale_prior_==SC_FLAT) {
1095 144 : const double r1 = random[1].Gaussian();
1096 144 : const double ds1 = Dscale_*r1;
1097 144 : new_scale += ds1;
1098 : // check boundaries
1099 144 : if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;}
1100 144 : if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;}
1101 : } else {
1102 0 : const double r1 = random[1].Gaussian();
1103 0 : const double ds1 = 0.5*(scale_mu_-new_scale)+Dscale_*std::exp(1)/M_PI*r1;
1104 0 : new_scale += ds1;
1105 : }
1106 : }
1107 :
1108 168 : double new_offset = offset_;
1109 :
1110 168 : if(dooffset_) {
1111 24 : if(offset_prior_==SC_FLAT) {
1112 24 : const double r1 = random[1].Gaussian();
1113 24 : const double ds1 = Doffset_*r1;
1114 24 : new_offset += ds1;
1115 : // check boundaries
1116 24 : if(new_offset > offset_max_) {new_offset = 2.0 * offset_max_ - new_offset;}
1117 24 : if(new_offset < offset_min_) {new_offset = 2.0 * offset_min_ - new_offset;}
1118 : } else {
1119 0 : const double r1 = random[1].Gaussian();
1120 0 : const double ds1 = 0.5*(offset_mu_-new_offset)+Doffset_*std::exp(1)/M_PI*r1;
1121 0 : new_offset += ds1;
1122 : }
1123 : }
1124 :
1125 : // calculate new energy
1126 : double new_energy = 0.;
1127 :
1128 168 : switch(noise_type_) {
1129 12 : case GAUSS:
1130 12 : new_energy = getEnergyGJ(mean_,sigma_,new_scale,new_offset);
1131 : break;
1132 48 : case MGAUSS:
1133 48 : new_energy = getEnergyGJE(mean_,sigma_,new_scale,new_offset);
1134 : break;
1135 48 : case OUTLIERS:
1136 48 : new_energy = getEnergySP(mean_,sigma_,new_scale,new_offset);
1137 : break;
1138 48 : case MOUTLIERS:
1139 48 : new_energy = getEnergySPE(mean_,sigma_,new_scale,new_offset);
1140 : break;
1141 12 : case GENERIC:
1142 12 : new_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,new_scale,new_offset);
1143 : break;
1144 : }
1145 :
1146 : // for the scale/offset we need to consider the total energy
1147 168 : std::vector<double> totenergies(2);
1148 168 : if(master) {
1149 96 : totenergies[0] = old_energy;
1150 96 : totenergies[1] = new_energy;
1151 96 : if(nrep_>1) multi_sim_comm.Sum(totenergies);
1152 : } else {
1153 72 : totenergies[0] = 0;
1154 72 : totenergies[1] = 0;
1155 : }
1156 168 : comm.Sum(totenergies);
1157 :
1158 : // accept or reject
1159 168 : const double delta = ( totenergies[1] - totenergies[0] ) / kbt_;
1160 : // if delta is negative always accept move
1161 168 : if( delta <= 0.0 ) {
1162 168 : old_energy = new_energy;
1163 168 : scale_ = new_scale;
1164 168 : offset_ = new_offset;
1165 168 : MCacceptScale_++;
1166 : // otherwise extract random number
1167 : } else {
1168 0 : double s = random[1].RandU01();
1169 0 : if( s < std::exp(-delta) ) {
1170 0 : old_energy = new_energy;
1171 0 : scale_ = new_scale;
1172 0 : offset_ = new_offset;
1173 0 : MCacceptScale_++;
1174 : }
1175 : }
1176 168 : }
1177 :
1178 178 : void Metainference::moveSigmas(const std::vector<double> &mean_, double &old_energy, const unsigned i, const std::vector<unsigned> &indices, bool &breaknow)
1179 : {
1180 178 : std::vector<double> new_sigma(sigma_.size());
1181 178 : new_sigma = sigma_;
1182 :
1183 : // change MCchunksize_ sigmas
1184 178 : if (MCchunksize_ > 0) {
1185 0 : if ((MCchunksize_ * i) >= sigma_.size()) {
1186 : // This means we are not moving any sigma, so we should break immediately
1187 0 : breaknow = true;
1188 : }
1189 :
1190 : // change random sigmas
1191 0 : for(unsigned j=0; j<MCchunksize_; j++) {
1192 0 : const unsigned shuffle_index = j + MCchunksize_ * i;
1193 0 : if (shuffle_index >= sigma_.size()) {
1194 : // Going any further will segfault but we should still evaluate the sigmas we changed
1195 : break;
1196 : }
1197 0 : const unsigned index = indices[shuffle_index];
1198 0 : const double r2 = random[0].Gaussian();
1199 0 : const double ds2 = Dsigma_[index]*r2;
1200 0 : new_sigma[index] = sigma_[index] + ds2;
1201 : // check boundaries
1202 0 : if(new_sigma[index] > sigma_max_[index]) {new_sigma[index] = 2.0 * sigma_max_[index] - new_sigma[index];}
1203 0 : if(new_sigma[index] < sigma_min_[index]) {new_sigma[index] = 2.0 * sigma_min_[index] - new_sigma[index];}
1204 : }
1205 : } else {
1206 : // change all sigmas
1207 3008 : for(unsigned j=0; j<sigma_.size(); j++) {
1208 2830 : const double r2 = random[0].Gaussian();
1209 2830 : const double ds2 = Dsigma_[j]*r2;
1210 2830 : new_sigma[j] = sigma_[j] + ds2;
1211 : // check boundaries
1212 2830 : if(new_sigma[j] > sigma_max_[j]) {new_sigma[j] = 2.0 * sigma_max_[j] - new_sigma[j];}
1213 2830 : if(new_sigma[j] < sigma_min_[j]) {new_sigma[j] = 2.0 * sigma_min_[j] - new_sigma[j];}
1214 : }
1215 : }
1216 :
1217 178 : if (breaknow) {
1218 : // We didnt move any sigmas, so no sense in evaluating anything
1219 : return;
1220 : }
1221 :
1222 : // calculate new energy
1223 : double new_energy = 0.;
1224 178 : switch(noise_type_) {
1225 12 : case GAUSS:
1226 12 : new_energy = getEnergyGJ(mean_,new_sigma,scale_,offset_);
1227 : break;
1228 52 : case MGAUSS:
1229 52 : new_energy = getEnergyGJE(mean_,new_sigma,scale_,offset_);
1230 : break;
1231 54 : case OUTLIERS:
1232 54 : new_energy = getEnergySP(mean_,new_sigma,scale_,offset_);
1233 : break;
1234 48 : case MOUTLIERS:
1235 48 : new_energy = getEnergySPE(mean_,new_sigma,scale_,offset_);
1236 : break;
1237 12 : case GENERIC:
1238 12 : new_energy = getEnergyMIGEN(mean_,ftilde_,new_sigma,scale_,offset_);
1239 : break;
1240 : }
1241 :
1242 : // accept or reject
1243 178 : const double delta = ( new_energy - old_energy ) / kbt_;
1244 : // if delta is negative always accept move
1245 178 : if( delta <= 0.0 ) {
1246 178 : old_energy = new_energy;
1247 178 : sigma_ = new_sigma;
1248 178 : MCaccept_++;
1249 : // otherwise extract random number
1250 : } else {
1251 0 : const double s = random[0].RandU01();
1252 0 : if( s < std::exp(-delta) ) {
1253 0 : old_energy = new_energy;
1254 0 : sigma_ = new_sigma;
1255 0 : MCaccept_++;
1256 : }
1257 : }
1258 : }
1259 :
1260 178 : double Metainference::doMonteCarlo(const std::vector<double> &mean_)
1261 : {
1262 : // calculate old energy with the updated coordinates
1263 178 : double old_energy=0.;
1264 :
1265 178 : switch(noise_type_) {
1266 12 : case GAUSS:
1267 12 : old_energy = getEnergyGJ(mean_,sigma_,scale_,offset_);
1268 12 : break;
1269 52 : case MGAUSS:
1270 52 : old_energy = getEnergyGJE(mean_,sigma_,scale_,offset_);
1271 52 : break;
1272 54 : case OUTLIERS:
1273 54 : old_energy = getEnergySP(mean_,sigma_,scale_,offset_);
1274 54 : break;
1275 48 : case MOUTLIERS:
1276 48 : old_energy = getEnergySPE(mean_,sigma_,scale_,offset_);
1277 48 : break;
1278 12 : case GENERIC:
1279 12 : old_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,scale_,offset_);
1280 12 : break;
1281 : }
1282 :
1283 : // do not run MC if this is a replica-exchange trial
1284 178 : if(!getExchangeStep()) {
1285 :
1286 : // Create std::vector of random sigma indices
1287 : std::vector<unsigned> indices;
1288 178 : if (MCchunksize_ > 0) {
1289 0 : for (unsigned j=0; j<sigma_.size(); j++) {
1290 0 : indices.push_back(j);
1291 : }
1292 0 : random[2].Shuffle(indices);
1293 : }
1294 178 : bool breaknow = false;
1295 :
1296 : // cycle on MC steps
1297 356 : for(unsigned i=0; i<MCsteps_; ++i) {
1298 178 : MCtrial_++;
1299 : // propose move for ftilde
1300 178 : if(noise_type_==GENERIC) moveTilde(mean_, old_energy);
1301 : // propose move for scale and/or offset
1302 178 : if(doscale_||dooffset_) moveScaleOffset(mean_, old_energy);
1303 : // propose move for sigma
1304 178 : moveSigmas(mean_, old_energy, i, indices, breaknow);
1305 : // exit from the loop if this is the case
1306 178 : if(breaknow) break;
1307 : }
1308 :
1309 : /* save the result of the sampling */
1310 : /* ftilde */
1311 178 : if(noise_type_==GENERIC) {
1312 12 : double accept = static_cast<double>(MCacceptFT_) / static_cast<double>(MCtrial_);
1313 12 : valueAcceptFT->set(accept);
1314 36 : for(unsigned i=0; i<sigma_.size(); i++) valueFtilde[i]->set(ftilde_[i]);
1315 : }
1316 : /* scale and offset */
1317 178 : if(doscale_ || doregres_zero_) valueScale->set(scale_);
1318 178 : if(dooffset_) valueOffset->set(offset_);
1319 178 : if(doscale_||dooffset_) {
1320 168 : double accept = static_cast<double>(MCacceptScale_) / static_cast<double>(MCtrial_);
1321 168 : valueAcceptScale->set(accept);
1322 : }
1323 : /* sigmas */
1324 3008 : for(unsigned i=0; i<sigma_.size(); i++) valueSigma[i]->set(sigma_[i]);
1325 178 : double accept = static_cast<double>(MCaccept_) / static_cast<double>(MCtrial_);
1326 178 : valueAccept->set(accept);
1327 : }
1328 :
1329 : // here we sum the score over the replicas to get the full metainference score that we save as a bias
1330 178 : if(master) {
1331 104 : if(nrep_>1) multi_sim_comm.Sum(old_energy);
1332 : } else {
1333 74 : old_energy=0;
1334 : }
1335 178 : comm.Sum(old_energy);
1336 :
1337 : // this is the energy with current coordinates and parameters
1338 178 : return old_energy;
1339 : }
1340 :
1341 : /*
1342 : In the following energy-force functions we don't add the normalisation and the jeffreys priors
1343 : because they are not needed for the forces, the correct MetaInference energy is the one calculated
1344 : in the Monte-Carlo
1345 : */
1346 :
1347 54 : void Metainference::getEnergyForceSP(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1348 : const std::vector<double> &dmean_b)
1349 : {
1350 54 : const double scale2 = scale_*scale_;
1351 54 : const double sm2 = sigma_mean2_[0];
1352 54 : const double ss2 = sigma_[0]*sigma_[0] + scale2*sm2;
1353 54 : std::vector<double> f(narg,0);
1354 :
1355 54 : if(master) {
1356 30 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1357 : {
1358 : #pragma omp for
1359 : for(unsigned i=0; i<narg; ++i) {
1360 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1361 : const double a2 = 0.5*dev*dev + ss2;
1362 : if(sm2 > 0.0) {
1363 : const double t = std::exp(-a2/sm2);
1364 : const double dt = 1./t;
1365 : const double dit = 1./(1.-dt);
1366 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1367 : }
1368 : else {
1369 : f[i] = -scale_*dev*(1./a2);
1370 : }
1371 : }
1372 : }
1373 : // collect contribution to forces and energy from other replicas
1374 30 : if(nrep_>1) multi_sim_comm.Sum(&f[0],narg);
1375 : }
1376 : // intra-replica summation
1377 54 : comm.Sum(&f[0],narg);
1378 :
1379 : double w_tmp = 0.;
1380 288 : for(unsigned i=0; i<narg; ++i) {
1381 234 : setOutputForce(i, kbt_*f[i]*dmean_x[i]);
1382 234 : w_tmp += kbt_*f[i]*dmean_b[i];
1383 : }
1384 :
1385 54 : if(do_reweight_) {
1386 48 : setOutputForce(narg, w_tmp);
1387 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1388 : }
1389 54 : }
1390 :
1391 48 : void Metainference::getEnergyForceSPE(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1392 : const std::vector<double> &dmean_b)
1393 : {
1394 48 : const double scale2 = scale_*scale_;
1395 48 : std::vector<double> f(narg,0);
1396 :
1397 48 : if(master) {
1398 24 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1399 : {
1400 : #pragma omp for
1401 : for(unsigned i=0; i<narg; ++i) {
1402 : const double sm2 = sigma_mean2_[i];
1403 : const double ss2 = sigma_[i]*sigma_[i] + scale2*sm2;
1404 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1405 : const double a2 = 0.5*dev*dev + ss2;
1406 : if(sm2 > 0.0) {
1407 : const double t = std::exp(-a2/sm2);
1408 : const double dt = 1./t;
1409 : const double dit = 1./(1.-dt);
1410 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1411 : }
1412 : else {
1413 : f[i] = -scale_*dev*(1./a2);
1414 : }
1415 : }
1416 : }
1417 : // collect contribution to forces and energy from other replicas
1418 24 : if(nrep_>1) multi_sim_comm.Sum(&f[0],narg);
1419 : }
1420 48 : comm.Sum(&f[0],narg);
1421 :
1422 : double w_tmp = 0.;
1423 240 : for(unsigned i=0; i<narg; ++i) {
1424 192 : setOutputForce(i, kbt_ * dmean_x[i] * f[i]);
1425 192 : w_tmp += kbt_ * dmean_b[i] *f[i];
1426 : }
1427 :
1428 48 : if(do_reweight_) {
1429 48 : setOutputForce(narg, w_tmp);
1430 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1431 : }
1432 48 : }
1433 :
1434 12 : void Metainference::getEnergyForceGJ(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1435 : const std::vector<double> &dmean_b)
1436 : {
1437 12 : const double scale2 = scale_*scale_;
1438 12 : double inv_s2=0.;
1439 :
1440 12 : if(master) {
1441 12 : inv_s2 = 1./(sigma_[0]*sigma_[0] + scale2*sigma_mean2_[0]);
1442 12 : if(nrep_>1) multi_sim_comm.Sum(inv_s2);
1443 : }
1444 12 : comm.Sum(inv_s2);
1445 :
1446 : double w_tmp = 0.;
1447 12 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1448 : {
1449 : #pragma omp for reduction( + : w_tmp)
1450 : for(unsigned i=0; i<narg; ++i) {
1451 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1452 : const double mult = dev*scale_*inv_s2;
1453 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1454 : w_tmp += kbt_*dmean_b[i]*mult;
1455 : }
1456 : }
1457 :
1458 12 : if(do_reweight_) {
1459 0 : setOutputForce(narg, -w_tmp);
1460 0 : getPntrToComponent("biasDer")->set(w_tmp);
1461 : }
1462 12 : }
1463 :
1464 52 : void Metainference::getEnergyForceGJE(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1465 : const std::vector<double> &dmean_b)
1466 : {
1467 52 : const double scale2 = scale_*scale_;
1468 52 : std::vector<double> inv_s2(sigma_.size(),0.);
1469 :
1470 52 : if(master) {
1471 1300 : for(unsigned i=0; i<sigma_.size(); ++i) inv_s2[i] = 1./(sigma_[i]*sigma_[i] + scale2*sigma_mean2_[i]);
1472 26 : if(nrep_>1) multi_sim_comm.Sum(&inv_s2[0],sigma_.size());
1473 : }
1474 52 : comm.Sum(&inv_s2[0],sigma_.size());
1475 :
1476 : double w_tmp = 0.;
1477 52 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1478 : {
1479 : #pragma omp for reduction( + : w_tmp)
1480 : for(unsigned i=0; i<narg; ++i) {
1481 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1482 : const double mult = dev*scale_*inv_s2[i];
1483 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1484 : w_tmp += kbt_*dmean_b[i]*mult;
1485 : }
1486 : }
1487 :
1488 52 : if(do_reweight_) {
1489 52 : setOutputForce(narg, -w_tmp);
1490 104 : getPntrToComponent("biasDer")->set(w_tmp);
1491 : }
1492 52 : }
1493 :
1494 12 : void Metainference::getEnergyForceMIGEN(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b)
1495 : {
1496 12 : std::vector<double> inv_s2(sigma_.size(),0.);
1497 12 : std::vector<double> dev(sigma_.size(),0.);
1498 12 : std::vector<double> dev2(sigma_.size(),0.);
1499 :
1500 36 : for(unsigned i=0; i<sigma_.size(); ++i) {
1501 24 : inv_s2[i] = 1./sigma_mean2_[i];
1502 24 : if(master) {
1503 24 : dev[i] = (mean[i]-ftilde_[i]);
1504 24 : dev2[i] = dev[i]*dev[i];
1505 : }
1506 : }
1507 12 : if(master&&nrep_>1) {
1508 0 : multi_sim_comm.Sum(&dev[0],dev.size());
1509 0 : multi_sim_comm.Sum(&dev2[0],dev2.size());
1510 : }
1511 12 : comm.Sum(&dev[0],dev.size());
1512 12 : comm.Sum(&dev2[0],dev2.size());
1513 :
1514 : double dene_b = 0.;
1515 12 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(dene_b)
1516 : {
1517 : #pragma omp for reduction( + : dene_b)
1518 : for(unsigned i=0; i<narg; ++i) {
1519 : const double dene_x = kbt_*inv_s2[i]*dmean_x[i]*dev[i];
1520 : dene_b += kbt_*inv_s2[i]*dmean_b[i]*dev[i];
1521 : setOutputForce(i, -dene_x);
1522 : }
1523 : }
1524 :
1525 12 : if(do_reweight_) {
1526 0 : setOutputForce(narg, -dene_b);
1527 0 : getPntrToComponent("biasDer")->set(dene_b);
1528 : }
1529 12 : }
1530 :
1531 178 : void Metainference::get_weights(const unsigned iselect, double &weight, double &norm, double &neff)
1532 : {
1533 178 : const double dnrep = static_cast<double>(nrep_);
1534 : // calculate the weights either from BIAS
1535 178 : if(do_reweight_) {
1536 148 : std::vector<double> bias(nrep_,0);
1537 148 : if(master) {
1538 74 : bias[replica_] = getArgument(narg);
1539 74 : if(nrep_>1) multi_sim_comm.Sum(&bias[0], nrep_);
1540 : }
1541 148 : comm.Sum(&bias[0], nrep_);
1542 :
1543 : // accumulate weights
1544 148 : const double decay = 1./static_cast<double> (average_weights_stride_);
1545 148 : if(!firstTimeW[iselect]) {
1546 408 : for(unsigned i=0; i<nrep_; ++i) {
1547 272 : const double delta=bias[i]-average_weights_[iselect][i];
1548 272 : average_weights_[iselect][i]+=decay*delta;
1549 : }
1550 : } else {
1551 : firstTimeW[iselect] = false;
1552 36 : for(unsigned i=0; i<nrep_; ++i) average_weights_[iselect][i] = bias[i];
1553 : }
1554 :
1555 : // set average back into bias and set norm to one
1556 148 : const double maxbias = *(std::max_element(average_weights_[iselect].begin(), average_weights_[iselect].end()));
1557 444 : for(unsigned i=0; i<nrep_; ++i) bias[i] = std::exp((average_weights_[iselect][i]-maxbias)/kbt_);
1558 : // set local weight, norm and weight variance
1559 148 : weight = bias[replica_];
1560 : double w2=0.;
1561 444 : for(unsigned i=0; i<nrep_; ++i) {
1562 296 : w2 += bias[i]*bias[i];
1563 296 : norm += bias[i];
1564 : }
1565 148 : neff = norm*norm/w2;
1566 296 : getPntrToComponent("weight")->set(weight/norm);
1567 : } else {
1568 : // or arithmetic ones
1569 30 : neff = dnrep;
1570 30 : weight = 1.0;
1571 30 : norm = dnrep;
1572 : }
1573 178 : getPntrToComponent("neff")->set(neff);
1574 178 : }
1575 :
1576 178 : void Metainference::get_sigma_mean(const unsigned iselect, const double weight, const double norm, const double neff, const std::vector<double> &mean)
1577 : {
1578 178 : const double dnrep = static_cast<double>(nrep_);
1579 178 : std::vector<double> sigma_mean2_tmp(sigma_mean2_.size(), 0.);
1580 :
1581 178 : if(do_optsigmamean_>0) {
1582 : // remove first entry of the history std::vector
1583 0 : if(sigma_mean2_last_[iselect][0].size()==optsigmamean_stride_&&optsigmamean_stride_>0)
1584 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_last_[iselect][i].erase(sigma_mean2_last_[iselect][i].begin());
1585 : /* this is the current estimate of sigma mean for each argument
1586 : there is one of this per argument in any case because it is
1587 : the maximum among these to be used in case of GAUSS/OUTLIER */
1588 0 : std::vector<double> sigma_mean2_now(narg,0);
1589 0 : if(master) {
1590 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_now[i] = weight*(getArgument(i)-mean[i])*(getArgument(i)-mean[i]);
1591 0 : if(nrep_>1) multi_sim_comm.Sum(&sigma_mean2_now[0], narg);
1592 : }
1593 0 : comm.Sum(&sigma_mean2_now[0], narg);
1594 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_now[i] *= 1.0/(neff-1.)/norm;
1595 :
1596 : // add sigma_mean2 to history
1597 0 : if(optsigmamean_stride_>0) {
1598 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_last_[iselect][i].push_back(sigma_mean2_now[i]);
1599 : } else {
1600 0 : for(unsigned i=0; i<narg; ++i) if(sigma_mean2_now[i] > sigma_mean2_last_[iselect][i][0]) sigma_mean2_last_[iselect][i][0] = sigma_mean2_now[i];
1601 : }
1602 :
1603 0 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1604 0 : for(unsigned i=0; i<narg; ++i) {
1605 : /* set to the maximum in history std::vector */
1606 0 : sigma_mean2_tmp[i] = *max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end());
1607 : /* the standard error of the mean */
1608 0 : valueSigmaMean[i]->set(std::sqrt(sigma_mean2_tmp[i]));
1609 0 : if(noise_type_==GENERIC) {
1610 0 : sigma_min_[i] = std::sqrt(sigma_mean2_tmp[i]);
1611 0 : if(sigma_[i] < sigma_min_[i]) sigma_[i] = sigma_min_[i];
1612 : }
1613 : }
1614 0 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1615 : // find maximum for each data point
1616 : std::vector <double> max_values;
1617 0 : for(unsigned i=0; i<narg; ++i) max_values.push_back(*max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end()));
1618 : // find maximum across data points
1619 0 : const double max_now = *max_element(max_values.begin(), max_values.end());
1620 : // set new value
1621 0 : sigma_mean2_tmp[0] = max_now;
1622 0 : valueSigmaMean[0]->set(std::sqrt(sigma_mean2_tmp[0]));
1623 : }
1624 : // endif sigma mean optimization
1625 : // start sigma max optimization
1626 0 : if(do_optsigmamean_>1&&!sigmamax_opt_done_) {
1627 0 : for(unsigned i=0; i<sigma_max_.size(); i++) {
1628 0 : if(sigma_max_est_[i]<sigma_mean2_tmp[i]&&optimized_step_>optsigmamean_stride_) sigma_max_est_[i]=sigma_mean2_tmp[i];
1629 : // ready to set once and for all the value of sigma_max
1630 0 : if(optimized_step_==N_optimized_step_) {
1631 0 : sigmamax_opt_done_=true;
1632 0 : for(unsigned i=0; i<sigma_max_.size(); i++) {
1633 0 : sigma_max_[i]=std::sqrt(sigma_max_est_[i]*dnrep);
1634 0 : Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
1635 0 : if(sigma_[i]>sigma_max_[i]) sigma_[i]=sigma_max_[i];
1636 : }
1637 : }
1638 : }
1639 0 : optimized_step_++;
1640 : }
1641 : // end sigma max optimization
1642 : } else {
1643 178 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1644 2876 : for(unsigned i=0; i<narg; ++i) {
1645 2764 : sigma_mean2_tmp[i] = sigma_mean2_last_[iselect][i][0];
1646 2764 : valueSigmaMean[i]->set(std::sqrt(sigma_mean2_tmp[i]));
1647 : }
1648 66 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1649 66 : sigma_mean2_tmp[0] = sigma_mean2_last_[iselect][0][0];
1650 66 : valueSigmaMean[0]->set(std::sqrt(sigma_mean2_tmp[0]));
1651 : }
1652 : }
1653 :
1654 178 : sigma_mean2_ = sigma_mean2_tmp;
1655 178 : }
1656 :
1657 178 : void Metainference::replica_averaging(const double weight, const double norm, std::vector<double> &mean, std::vector<double> &dmean_b)
1658 : {
1659 178 : if(master) {
1660 1660 : for(unsigned i=0; i<narg; ++i) mean[i] = weight/norm*getArgument(i);
1661 104 : if(nrep_>1) multi_sim_comm.Sum(&mean[0], narg);
1662 : }
1663 178 : comm.Sum(&mean[0], narg);
1664 : // set the derivative of the mean with respect to the bias
1665 3200 : for(unsigned i=0; i<narg; ++i) dmean_b[i] = weight/norm/kbt_*(getArgument(i)-mean[i])/static_cast<double>(average_weights_stride_);
1666 :
1667 : // this is only for generic metainference
1668 178 : if(firstTime) {ftilde_ = mean; firstTime = false;}
1669 178 : }
1670 :
1671 0 : void Metainference::do_regression_zero(const std::vector<double> &mean)
1672 : {
1673 : // parameters[i] = scale_ * mean[i]: find scale_ with linear regression
1674 : double num = 0.0;
1675 : double den = 0.0;
1676 0 : for(unsigned i=0; i<parameters.size(); ++i) {
1677 0 : num += mean[i] * parameters[i];
1678 0 : den += mean[i] * mean[i];
1679 : }
1680 0 : if(den>0) {
1681 0 : scale_ = num / den;
1682 : } else {
1683 0 : scale_ = 1.0;
1684 : }
1685 0 : }
1686 :
1687 178 : void Metainference::calculate()
1688 : {
1689 : // get step
1690 178 : const long int step = getStep();
1691 :
1692 : unsigned iselect = 0;
1693 : // set the value of selector for REM-like stuff
1694 178 : if(selector_.length()>0) iselect = static_cast<unsigned>(plumed.passMap[selector_]);
1695 :
1696 : /* 1) collect weights */
1697 178 : double weight = 0.;
1698 178 : double neff = 0.;
1699 178 : double norm = 0.;
1700 178 : get_weights(iselect, weight, norm, neff);
1701 :
1702 : /* 2) calculate average */
1703 178 : std::vector<double> mean(narg,0);
1704 : // this is the derivative of the mean with respect to the argument
1705 178 : std::vector<double> dmean_x(narg,weight/norm);
1706 : // this is the derivative of the mean with respect to the bias
1707 178 : std::vector<double> dmean_b(narg,0);
1708 : // calculate it
1709 178 : replica_averaging(weight, norm, mean, dmean_b);
1710 :
1711 : /* 3) calculates parameters */
1712 178 : get_sigma_mean(iselect, weight, norm, neff, mean);
1713 :
1714 : // in case of regression with zero intercept, calculate scale
1715 178 : if(doregres_zero_ && step%nregres_zero_==0) do_regression_zero(mean);
1716 :
1717 : /* 4) run monte carlo */
1718 178 : double ene = doMonteCarlo(mean);
1719 :
1720 : // calculate bias and forces
1721 178 : switch(noise_type_) {
1722 12 : case GAUSS:
1723 12 : getEnergyForceGJ(mean, dmean_x, dmean_b);
1724 : break;
1725 52 : case MGAUSS:
1726 52 : getEnergyForceGJE(mean, dmean_x, dmean_b);
1727 : break;
1728 54 : case OUTLIERS:
1729 54 : getEnergyForceSP(mean, dmean_x, dmean_b);
1730 : break;
1731 48 : case MOUTLIERS:
1732 48 : getEnergyForceSPE(mean, dmean_x, dmean_b);
1733 : break;
1734 12 : case GENERIC:
1735 12 : getEnergyForceMIGEN(mean, dmean_x, dmean_b);
1736 : break;
1737 : }
1738 :
1739 : setBias(ene);
1740 178 : }
1741 :
1742 19 : void Metainference::writeStatus()
1743 : {
1744 19 : sfile_.rewind();
1745 19 : sfile_.printField("time",getTimeStep()*getStep());
1746 : //nsel
1747 38 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
1748 : std::string msg_i,msg_j;
1749 19 : Tools::convert(i,msg_i);
1750 : std::vector <double> max_values;
1751 : //narg
1752 2434 : for(unsigned j=0; j<narg; ++j) {
1753 2415 : Tools::convert(j,msg_j);
1754 2415 : std::string msg = msg_i+"_"+msg_j;
1755 2415 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1756 4780 : sfile_.printField("sigmaMean_"+msg,std::sqrt(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end())));
1757 : } else {
1758 : // find maximum for each data point
1759 25 : max_values.push_back(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end()));
1760 : }
1761 : }
1762 19 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1763 : // find maximum across data points
1764 6 : const double max_now = std::sqrt(*max_element(max_values.begin(), max_values.end()));
1765 6 : Tools::convert(0,msg_j);
1766 6 : std::string msg = msg_i+"_"+msg_j;
1767 12 : sfile_.printField("sigmaMean_"+msg, max_now);
1768 : }
1769 : }
1770 2415 : for(unsigned i=0; i<sigma_.size(); ++i) {
1771 : std::string msg;
1772 2396 : Tools::convert(i,msg);
1773 4792 : sfile_.printField("sigma_"+msg,sigma_[i]);
1774 : }
1775 2415 : for(unsigned i=0; i<sigma_max_.size(); ++i) {
1776 : std::string msg;
1777 2396 : Tools::convert(i,msg);
1778 4792 : sfile_.printField("sigma_max_"+msg,sigma_max_[i]);
1779 : }
1780 19 : if(noise_type_==GENERIC) {
1781 3 : for(unsigned i=0; i<ftilde_.size(); ++i) {
1782 : std::string msg;
1783 2 : Tools::convert(i,msg);
1784 4 : sfile_.printField("ftilde_"+msg,ftilde_[i]);
1785 : }
1786 : }
1787 19 : sfile_.printField("scale0_",scale_);
1788 19 : sfile_.printField("offset0_",offset_);
1789 38 : for(unsigned i=0; i<average_weights_.size(); i++) {
1790 : std::string msg_i;
1791 19 : Tools::convert(i,msg_i);
1792 38 : sfile_.printField("weight_"+msg_i,average_weights_[i][replica_]);
1793 : }
1794 19 : sfile_.printField();
1795 19 : sfile_.flush();
1796 19 : }
1797 :
1798 178 : void Metainference::update() {
1799 : // write status file
1800 178 : if(write_stride_>0&& (getStep()%write_stride_==0 || getCPT()) ) writeStatus();
1801 178 : }
1802 :
1803 : }
1804 : }
1805 :
|