Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 :
23 : #include "bias/Bias.h"
24 : #include "core/ActionRegister.h"
25 : #include "core/PlumedMain.h"
26 : #include "core/Value.h"
27 : #include "tools/File.h"
28 : #include "tools/OpenMP.h"
29 : #include "tools/Random.h"
30 : #include "tools/Communicator.h"
31 : #include <chrono>
32 : #include <numeric>
33 :
34 : #ifndef M_PI
35 : #define M_PI 3.14159265358979323846
36 : #endif
37 :
38 : namespace PLMD {
39 : namespace isdb {
40 :
41 : //+PLUMEDOC ISDB_BIAS METAINFERENCE
42 : /*
43 : Calculates the Metainference energy for a set of experimental data.
44 :
45 : Metainference \cite Bonomi:2016ip is a Bayesian framework
46 : to model heterogeneous systems by integrating prior information with noisy, ensemble-averaged data.
47 : Metainference models a system and quantifies the level of noise in the data by considering a set of replicas of the system.
48 :
49 : Calculated experimental data are given in input as ARG while reference experimental values
50 : can be given either from fixed components of other actions using PARARG or as numbers using
51 : PARAMETERS. The default behavior is that of averaging the data over the available replicas,
52 : if this is not wanted the keyword NOENSEMBLE prevent this averaging.
53 :
54 : Metadynamics Metainference \cite Bonomi:2016ge or more in general biased Metainference requires the knowledge of
55 : biasing potential in order to calculate the weighted average. In this case the value of the bias
56 : can be provided as the last argument in ARG and adding the keyword REWEIGHT. To avoid the noise
57 : resulting from the instantaneous value of the bias the weight of each replica can be averaged
58 : over a give time using the keyword AVERAGING.
59 :
60 : The data can be averaged by using multiple replicas and weighted for a bias if present.
61 : The functional form of Metainference can be chosen among four variants selected
62 : with NOISE=GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC which correspond to modelling the noise for
63 : the arguments as a single gaussian common to all the data points, a gaussian per data
64 : point, a single long-tailed gaussian common to all the data points, a log-tailed
65 : gaussian per data point or using two distinct noises as for the most general formulation of Metainference.
66 : In this latter case the noise of the replica-averaging is gaussian (one per data point) and the noise for
67 : the comparison with the experimental data can chosen using the keyword LIKELIHOOD
68 : between gaussian or log-normal (one per data point), furthermore the evolution of the estimated average
69 : over an infinite number of replicas is driven by DFTILDE.
70 :
71 : As for Metainference theory there are two sigma values: SIGMA_MEAN0 represent the
72 : error of calculating an average quantity using a finite set of replica and should
73 : be set as small as possible following the guidelines for replica-averaged simulations
74 : in the framework of the Maximum Entropy Principle. Alternatively, this can be obtained
75 : automatically using the internal sigma mean optimization as introduced in \cite Lohr:2017gc
76 : (OPTSIGMAMEAN=SEM), in this second case sigma_mean is estimated from the maximum standard error
77 : of the mean either over the simulation or over a defined time using the keyword AVERAGING.
78 : SIGMA_BIAS is an uncertainty parameter, sampled by a MC algorithm in the bounded interval
79 : defined by SIGMA_MIN and SIGMA_MAX. The initial value is set at SIGMA0. The MC move is a
80 : random displacement of maximum value equal to DSIGMA. If the number of data point is
81 : too large and the acceptance rate drops it is possible to make the MC move over mutually
82 : exclusive, random subset of size MC_CHUNKSIZE and run more than one move setting MC_STEPS
83 : in such a way that MC_CHUNKSIZE*MC_STEPS will cover all the data points.
84 :
85 : Calculated and experimental data can be compared modulo a scaling factor and/or an offset
86 : using SCALEDATA and/or ADDOFFSET, the sampling is obtained by a MC algorithm either using
87 : a flat or a gaussian prior setting it with SCALE_PRIOR or OFFSET_PRIOR.
88 :
89 : \par Examples
90 :
91 : In the following example we calculate a set of \ref RDC, take the replica-average of
92 : them and comparing them with a set of experimental values. RDCs are compared with
93 : the experimental data but for a multiplication factor SCALE that is also sampled by
94 : MC on-the-fly
95 :
96 : \plumedfile
97 : RDC ...
98 : LABEL=rdc
99 : SCALE=0.0001
100 : GYROM=-72.5388
101 : ATOMS1=22,23
102 : ATOMS2=25,27
103 : ATOMS3=29,31
104 : ATOMS4=33,34
105 : ... RDC
106 :
107 : METAINFERENCE ...
108 : ARG=rdc.*
109 : NOISETYPE=MGAUSS
110 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
111 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
112 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
113 : SIGMA_MEAN0=0.001
114 : LABEL=spe
115 : ... METAINFERENCE
116 :
117 : PRINT ARG=spe.bias FILE=BIAS STRIDE=1
118 : \endplumedfile
119 :
120 : in the following example instead of using one uncertainty parameter per data point we use
121 : a single uncertainty value in a long-tailed gaussian to take into account for outliers, furthermore
122 : the data are weighted for the bias applied to other variables of the system.
123 :
124 : \plumedfile
125 : RDC ...
126 : LABEL=rdc
127 : SCALE=0.0001
128 : GYROM=-72.5388
129 : ATOMS1=22,23
130 : ATOMS2=25,27
131 : ATOMS3=29,31
132 : ATOMS4=33,34
133 : ... RDC
134 :
135 : cv1: TORSION ATOMS=1,2,3,4
136 : cv2: TORSION ATOMS=2,3,4,5
137 : mm: METAD ARG=cv1,cv2 HEIGHT=0.5 SIGMA=0.3,0.3 PACE=200 BIASFACTOR=8 WALKERS_MPI
138 :
139 : METAINFERENCE ...
140 : #SETTINGS NREPLICAS=2
141 : ARG=rdc.*,mm.bias
142 : REWEIGHT
143 : NOISETYPE=OUTLIERS
144 : PARAMETERS=1.9190,2.9190,3.9190,4.9190
145 : SCALEDATA SCALE0=1 SCALE_MIN=0.1 SCALE_MAX=3 DSCALE=0.01
146 : SIGMA0=0.01 SIGMA_MIN=0.00001 SIGMA_MAX=3 DSIGMA=0.01
147 : SIGMA_MEAN0=0.001
148 : LABEL=spe
149 : ... METAINFERENCE
150 : \endplumedfile
151 :
152 : (See also \ref RDC, \ref PBMETAD).
153 :
154 : */
155 : //+ENDPLUMEDOC
156 :
157 : class Metainference : public bias::Bias
158 : {
159 : // experimental values
160 : std::vector<double> parameters;
161 : // noise type
162 : unsigned noise_type_;
163 : enum { GAUSS, MGAUSS, OUTLIERS, MOUTLIERS, GENERIC };
164 : unsigned gen_likelihood_;
165 : enum { LIKE_GAUSS, LIKE_LOGN };
166 : // scale is data scaling factor
167 : // noise type
168 : unsigned scale_prior_;
169 : enum { SC_GAUSS, SC_FLAT };
170 : bool doscale_;
171 : double scale_;
172 : double scale_mu_;
173 : double scale_min_;
174 : double scale_max_;
175 : double Dscale_;
176 : // scale is data scaling factor
177 : // noise type
178 : unsigned offset_prior_;
179 : bool dooffset_;
180 : double offset_;
181 : double offset_mu_;
182 : double offset_min_;
183 : double offset_max_;
184 : double Doffset_;
185 : // scale and offset regression
186 : bool doregres_zero_;
187 : int nregres_zero_;
188 : // sigma is data uncertainty
189 : std::vector<double> sigma_;
190 : std::vector<double> sigma_min_;
191 : std::vector<double> sigma_max_;
192 : std::vector<double> Dsigma_;
193 : // sigma_mean is uncertainty in the mean estimate
194 : std::vector<double> sigma_mean2_;
195 : // this is the estimator of the mean value per replica for generic metainference
196 : std::vector<double> ftilde_;
197 : double Dftilde_;
198 :
199 : // temperature in kbt
200 : double kbt_;
201 :
202 : // Monte Carlo stuff
203 : std::vector<Random> random;
204 : unsigned MCsteps_;
205 : long long unsigned MCaccept_;
206 : long long unsigned MCacceptScale_;
207 : long long unsigned MCacceptFT_;
208 : long long unsigned MCtrial_;
209 : unsigned MCchunksize_;
210 :
211 : // output
212 : Value* valueScale;
213 : Value* valueOffset;
214 : Value* valueAccept;
215 : Value* valueAcceptScale;
216 : Value* valueAcceptFT;
217 : std::vector<Value*> valueSigma;
218 : std::vector<Value*> valueSigmaMean;
219 : std::vector<Value*> valueFtilde;
220 :
221 : // restart
222 : unsigned write_stride_;
223 : OFile sfile_;
224 :
225 : // others
226 : bool firstTime;
227 : std::vector<bool> firstTimeW;
228 : bool master;
229 : bool do_reweight_;
230 : unsigned do_optsigmamean_;
231 : unsigned nrep_;
232 : unsigned replica_;
233 : unsigned narg;
234 :
235 : // selector
236 : std::string selector_;
237 :
238 : // optimize sigma mean
239 : std::vector< std::vector < std::vector <double> > > sigma_mean2_last_;
240 : unsigned optsigmamean_stride_;
241 : // optimize sigma max
242 : unsigned N_optimized_step_;
243 : unsigned optimized_step_;
244 : bool sigmamax_opt_done_;
245 : std::vector<double> sigma_max_est_;
246 :
247 : // average weights
248 : unsigned average_weights_stride_;
249 : std::vector< std::vector <double> > average_weights_;
250 :
251 : double getEnergyMIGEN(const std::vector<double> &mean, const std::vector<double> &ftilde, const std::vector<double> &sigma,
252 : const double scale, const double offset);
253 : double getEnergySP(const std::vector<double> &mean, const std::vector<double> &sigma,
254 : const double scale, const double offset);
255 : double getEnergySPE(const std::vector<double> &mean, const std::vector<double> &sigma,
256 : const double scale, const double offset);
257 : double getEnergyGJ(const std::vector<double> &mean, const std::vector<double> &sigma,
258 : const double scale, const double offset);
259 : double getEnergyGJE(const std::vector<double> &mean, const std::vector<double> &sigma,
260 : const double scale, const double offset);
261 : void moveTilde(const std::vector<double> &mean_, double &old_energy);
262 : void moveScaleOffset(const std::vector<double> &mean_, double &old_energy);
263 : void moveSigmas(const std::vector<double> &mean_, double &old_energy, const unsigned i, const std::vector<unsigned> &indices, bool &breaknow);
264 : double doMonteCarlo(const std::vector<double> &mean);
265 : void getEnergyForceMIGEN(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
266 : void getEnergyForceSP(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
267 : void getEnergyForceSPE(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
268 : void getEnergyForceGJ(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
269 : void getEnergyForceGJE(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b);
270 : void get_weights(const unsigned iselect, double &weight, double &norm, double &neff);
271 : void replica_averaging(const double weight, const double norm, std::vector<double> &mean, std::vector<double> &dmean_b);
272 : void get_sigma_mean(const unsigned iselect, const double weight, const double norm, const double neff, const std::vector<double> &mean);
273 : void writeStatus();
274 : void do_regression_zero(const std::vector<double> &mean);
275 :
276 : public:
277 : explicit Metainference(const ActionOptions&);
278 : ~Metainference();
279 : void calculate() override;
280 : void update() override;
281 : static void registerKeywords(Keywords& keys);
282 : };
283 :
284 :
285 : PLUMED_REGISTER_ACTION(Metainference,"METAINFERENCE")
286 :
287 21 : void Metainference::registerKeywords(Keywords& keys) {
288 21 : Bias::registerKeywords(keys);
289 42 : keys.addInputKeyword("optional","PARARG","scalar","reference values for the experimental data, these can be provided as arguments without derivatives");
290 42 : keys.add("optional","PARAMETERS","reference values for the experimental data");
291 42 : keys.addFlag("NOENSEMBLE",false,"don't perform any replica-averaging");
292 42 : keys.addFlag("REWEIGHT",false,"simple REWEIGHT using the latest ARG as energy");
293 42 : keys.add("optional","AVERAGING", "Stride for calculation of averaged weights and sigma_mean");
294 42 : keys.add("compulsory","NOISETYPE","MGAUSS","functional form of the noise (GAUSS,MGAUSS,OUTLIERS,MOUTLIERS,GENERIC)");
295 42 : keys.add("compulsory","LIKELIHOOD","GAUSS","the likelihood for the GENERIC metainference model, GAUSS or LOGN");
296 42 : keys.add("compulsory","DFTILDE","0.1","fraction of sigma_mean used to evolve ftilde");
297 42 : keys.addFlag("SCALEDATA",false,"Set to TRUE if you want to sample a scaling factor common to all values and replicas");
298 42 : keys.add("compulsory","SCALE0","1.0","initial value of the scaling factor");
299 42 : keys.add("compulsory","SCALE_PRIOR","FLAT","either FLAT or GAUSSIAN");
300 42 : keys.add("optional","SCALE_MIN","minimum value of the scaling factor");
301 42 : keys.add("optional","SCALE_MAX","maximum value of the scaling factor");
302 42 : keys.add("optional","DSCALE","maximum MC move of the scaling factor");
303 42 : keys.addFlag("ADDOFFSET",false,"Set to TRUE if you want to sample an offset common to all values and replicas");
304 42 : keys.add("compulsory","OFFSET0","0.0","initial value of the offset");
305 42 : keys.add("compulsory","OFFSET_PRIOR","FLAT","either FLAT or GAUSSIAN");
306 42 : keys.add("optional","OFFSET_MIN","minimum value of the offset");
307 42 : keys.add("optional","OFFSET_MAX","maximum value of the offset");
308 42 : keys.add("optional","DOFFSET","maximum MC move of the offset");
309 42 : keys.add("optional","REGRES_ZERO","stride for regression with zero offset");
310 42 : keys.add("compulsory","SIGMA0","1.0","initial value of the uncertainty parameter");
311 42 : keys.add("compulsory","SIGMA_MIN","0.0","minimum value of the uncertainty parameter");
312 42 : keys.add("compulsory","SIGMA_MAX","10.","maximum value of the uncertainty parameter");
313 42 : keys.add("optional","DSIGMA","maximum MC move of the uncertainty parameter");
314 42 : keys.add("compulsory","OPTSIGMAMEAN","NONE","Set to NONE/SEM to manually set sigma mean, or to estimate it on the fly");
315 42 : keys.add("optional","SIGMA_MEAN0","starting value for the uncertainty in the mean estimate");
316 42 : keys.add("optional","SIGMA_MAX_STEPS", "Number of steps used to optimise SIGMA_MAX, before that the SIGMA_MAX value is used");
317 42 : keys.add("optional","TEMP","the system temperature - this is only needed if code doesn't pass the temperature to plumed");
318 42 : keys.add("optional","MC_STEPS","number of MC steps");
319 42 : keys.add("optional","MC_CHUNKSIZE","MC chunksize");
320 42 : keys.add("optional","STATUS_FILE","write a file with all the data useful for restart/continuation of Metainference");
321 42 : keys.add("compulsory","WRITE_STRIDE","10000","write the status to a file every N steps, this can be used for restart/continuation");
322 42 : keys.add("optional","SELECTOR","name of selector");
323 42 : keys.add("optional","NSELECT","range of values for selector [0, N-1]");
324 21 : keys.use("RESTART");
325 42 : keys.addOutputComponent("sigma", "default", "scalar","uncertainty parameter");
326 42 : keys.addOutputComponent("sigmaMean", "default", "scalar","uncertainty in the mean estimate");
327 42 : keys.addOutputComponent("neff", "default", "scalar","effective number of replicas");
328 42 : keys.addOutputComponent("acceptSigma", "default", "scalar","MC acceptance for sigma values");
329 42 : keys.addOutputComponent("acceptScale", "SCALEDATA", "scalar","MC acceptance for scale value");
330 42 : keys.addOutputComponent("acceptFT", "GENERIC", "scalar","MC acceptance for general metainference f tilde value");
331 42 : keys.addOutputComponent("weight", "REWEIGHT", "scalar","weights of the weighted average");
332 42 : keys.addOutputComponent("biasDer", "REWEIGHT", "scalar","derivatives with respect to the bias");
333 42 : keys.addOutputComponent("scale", "SCALEDATA", "scalar","scale parameter");
334 42 : keys.addOutputComponent("offset", "ADDOFFSET", "scalar","offset parameter");
335 42 : keys.addOutputComponent("ftilde", "GENERIC", "scalar","ensemble average estimator");
336 21 : }
337 :
338 19 : Metainference::Metainference(const ActionOptions&ao):
339 : PLUMED_BIAS_INIT(ao),
340 19 : doscale_(false),
341 19 : scale_(1.),
342 19 : scale_mu_(0),
343 19 : scale_min_(1),
344 19 : scale_max_(-1),
345 19 : Dscale_(-1),
346 19 : dooffset_(false),
347 19 : offset_(0.),
348 19 : offset_mu_(0),
349 19 : offset_min_(1),
350 19 : offset_max_(-1),
351 19 : Doffset_(-1),
352 19 : doregres_zero_(false),
353 19 : nregres_zero_(0),
354 19 : Dftilde_(0.1),
355 19 : random(3),
356 19 : MCsteps_(1),
357 19 : MCaccept_(0),
358 19 : MCacceptScale_(0),
359 19 : MCacceptFT_(0),
360 19 : MCtrial_(0),
361 19 : MCchunksize_(0),
362 19 : write_stride_(0),
363 19 : firstTime(true),
364 19 : do_reweight_(false),
365 19 : do_optsigmamean_(0),
366 19 : optsigmamean_stride_(0),
367 19 : N_optimized_step_(0),
368 19 : optimized_step_(0),
369 19 : sigmamax_opt_done_(false),
370 19 : average_weights_stride_(1)
371 : {
372 19 : bool noensemble = false;
373 19 : parseFlag("NOENSEMBLE", noensemble);
374 :
375 : // set up replica stuff
376 19 : master = (comm.Get_rank()==0);
377 19 : if(master) {
378 11 : nrep_ = multi_sim_comm.Get_size();
379 11 : replica_ = multi_sim_comm.Get_rank();
380 11 : if(noensemble) nrep_ = 1;
381 : } else {
382 8 : nrep_ = 0;
383 8 : replica_ = 0;
384 : }
385 19 : comm.Sum(&nrep_,1);
386 19 : comm.Sum(&replica_,1);
387 :
388 19 : unsigned nsel = 1;
389 19 : parse("SELECTOR", selector_);
390 38 : parse("NSELECT", nsel);
391 : // do checks
392 19 : if(selector_.length()>0 && nsel<=1) error("With SELECTOR active, NSELECT must be greater than 1");
393 19 : if(selector_.length()==0 && nsel>1) error("With NSELECT greater than 1, you must specify SELECTOR");
394 :
395 : // initialise firstTimeW
396 19 : firstTimeW.resize(nsel, true);
397 :
398 : // reweight implies a different number of arguments (the latest one must always be the bias)
399 19 : parseFlag("REWEIGHT", do_reweight_);
400 19 : if(do_reweight_&&nrep_<2) error("REWEIGHT can only be used in parallel with 2 or more replicas");
401 34 : if(!getRestart()) average_weights_.resize(nsel, std::vector<double> (nrep_, 1./static_cast<double>(nrep_)));
402 8 : else average_weights_.resize(nsel, std::vector<double> (nrep_, 0.));
403 19 : narg = getNumberOfArguments();
404 19 : if(do_reweight_) narg--;
405 :
406 19 : unsigned averaging=0;
407 19 : parse("AVERAGING", averaging);
408 19 : if(averaging>0) {
409 0 : average_weights_stride_ = averaging;
410 0 : optsigmamean_stride_ = averaging;
411 : }
412 :
413 38 : parseVector("PARAMETERS",parameters);
414 19 : if(parameters.size()!=static_cast<unsigned>(narg)&&!parameters.empty())
415 0 : error("Size of PARAMETERS array should be either 0 or the same as of the number of arguments in ARG1");
416 :
417 : std::vector<Value*> arg2;
418 38 : parseArgumentList("PARARG",arg2);
419 19 : if(!arg2.empty()) {
420 4 : if(parameters.size()>0) error("It is not possible to use PARARG and PARAMETERS together");
421 4 : if(arg2.size()!=narg) error("Size of PARARG array should be the same as number for arguments in ARG");
422 2360 : for(unsigned i=0; i<arg2.size(); i++) {
423 2356 : parameters.push_back(arg2[i]->get());
424 2356 : if(arg2[i]->hasDerivatives()==true) error("PARARG can only accept arguments without derivatives");
425 : }
426 : }
427 :
428 19 : if(parameters.size()!=narg)
429 0 : error("PARARG or PARAMETERS arrays should include the same number of elements as the arguments in ARG");
430 :
431 : std::string stringa_noise;
432 38 : parse("NOISETYPE",stringa_noise);
433 19 : if(stringa_noise=="GAUSS") noise_type_ = GAUSS;
434 18 : else if(stringa_noise=="MGAUSS") noise_type_ = MGAUSS;
435 10 : else if(stringa_noise=="OUTLIERS") noise_type_ = OUTLIERS;
436 5 : else if(stringa_noise=="MOUTLIERS") noise_type_ = MOUTLIERS;
437 1 : else if(stringa_noise=="GENERIC") noise_type_ = GENERIC;
438 0 : else error("Unknown noise type!");
439 :
440 19 : if(noise_type_== GENERIC) {
441 : std::string stringa_like;
442 2 : parse("LIKELIHOOD",stringa_like);
443 1 : if(stringa_like=="GAUSS") gen_likelihood_ = LIKE_GAUSS;
444 0 : else if(stringa_like=="LOGN") gen_likelihood_ = LIKE_LOGN;
445 0 : else error("Unknown likelihood type!");
446 :
447 2 : parse("DFTILDE",Dftilde_);
448 : }
449 :
450 38 : parse("WRITE_STRIDE",write_stride_);
451 : std::string status_file_name_;
452 38 : parse("STATUS_FILE",status_file_name_);
453 38 : if(status_file_name_=="") status_file_name_ = "MISTATUS"+getLabel();
454 0 : else status_file_name_ = status_file_name_+getLabel();
455 :
456 : std::string stringa_optsigma;
457 38 : parse("OPTSIGMAMEAN", stringa_optsigma);
458 19 : if(stringa_optsigma=="NONE") do_optsigmamean_=0;
459 0 : else if(stringa_optsigma=="SEM") do_optsigmamean_=1;
460 0 : else if(stringa_optsigma=="SEM_MAX") do_optsigmamean_=2;
461 :
462 19 : unsigned aver_max_steps=0;
463 19 : parse("SIGMA_MAX_STEPS", aver_max_steps);
464 19 : if(aver_max_steps==0&&do_optsigmamean_==2) aver_max_steps=averaging*2000;
465 19 : if(aver_max_steps>0&&do_optsigmamean_<2) error("SIGMA_MAX_STEPS can only be used together with OPTSIGMAMEAN=SEM_MAX");
466 19 : if(aver_max_steps>0&&do_optsigmamean_==2) N_optimized_step_=aver_max_steps;
467 19 : if(aver_max_steps>0&&aver_max_steps<averaging) error("SIGMA_MAX_STEPS must be greater than AVERAGING");
468 :
469 : // resize std::vector for sigma_mean history
470 19 : sigma_mean2_last_.resize(nsel);
471 38 : for(unsigned i=0; i<nsel; i++) sigma_mean2_last_[i].resize(narg);
472 :
473 : std::vector<double> read_sigma_mean_;
474 19 : parseVector("SIGMA_MEAN0",read_sigma_mean_);
475 19 : if(do_optsigmamean_==0 && read_sigma_mean_.size()==0 && !getRestart())
476 0 : error("If you don't use OPTSIGMAMEAN and you are not RESTARTING then you MUST SET SIGMA_MEAN0");
477 :
478 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
479 13 : if(read_sigma_mean_.size()==narg) {
480 0 : sigma_mean2_.resize(narg);
481 0 : for(unsigned i=0; i<narg; i++) sigma_mean2_[i]=read_sigma_mean_[i]*read_sigma_mean_[i];
482 13 : } else if(read_sigma_mean_.size()==1) {
483 13 : sigma_mean2_.resize(narg,read_sigma_mean_[0]*read_sigma_mean_[0]);
484 0 : } else if(read_sigma_mean_.size()==0) {
485 0 : sigma_mean2_.resize(narg,0.000001);
486 : } else {
487 0 : error("SIGMA_MEAN0 can accept either one single value or as many values as the arguments (with NOISETYPE=MGAUSS|MOUTLIERS)");
488 : }
489 : // set the initial value for the history
490 2416 : for(unsigned i=0; i<nsel; i++) for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j].push_back(sigma_mean2_[j]);
491 : } else {
492 6 : if(read_sigma_mean_.size()==1) {
493 6 : sigma_mean2_.resize(1, read_sigma_mean_[0]*read_sigma_mean_[0]);
494 0 : } else if(read_sigma_mean_.size()==0) {
495 0 : sigma_mean2_.resize(1, 0.000001);
496 : } else {
497 0 : error("If you want to use more than one SIGMA_MEAN0 you should use NOISETYPE=MGAUSS|MOUTLIERS");
498 : }
499 : // set the initial value for the history
500 37 : for(unsigned i=0; i<nsel; i++) for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j].push_back(sigma_mean2_[0]);
501 : }
502 :
503 19 : parseFlag("SCALEDATA", doscale_);
504 19 : if(doscale_) {
505 : std::string stringa_noise;
506 24 : parse("SCALE_PRIOR",stringa_noise);
507 12 : if(stringa_noise=="GAUSSIAN") scale_prior_ = SC_GAUSS;
508 12 : else if(stringa_noise=="FLAT") scale_prior_ = SC_FLAT;
509 0 : else error("Unknown SCALE_PRIOR type!");
510 12 : parse("SCALE0",scale_);
511 12 : parse("DSCALE",Dscale_);
512 12 : if(Dscale_<0.) error("DSCALE must be set when using SCALEDATA");
513 12 : if(scale_prior_==SC_GAUSS) {
514 0 : scale_mu_=scale_;
515 : } else {
516 12 : parse("SCALE_MIN",scale_min_);
517 12 : parse("SCALE_MAX",scale_max_);
518 12 : if(scale_max_<scale_min_) error("SCALE_MAX and SCALE_MIN must be set when using SCALE_PRIOR=FLAT");
519 : }
520 : }
521 :
522 19 : parseFlag("ADDOFFSET", dooffset_);
523 19 : if(dooffset_) {
524 : std::string stringa_noise;
525 4 : parse("OFFSET_PRIOR",stringa_noise);
526 2 : if(stringa_noise=="GAUSSIAN") offset_prior_ = SC_GAUSS;
527 2 : else if(stringa_noise=="FLAT") offset_prior_ = SC_FLAT;
528 0 : else error("Unknown OFFSET_PRIOR type!");
529 2 : parse("OFFSET0",offset_);
530 2 : parse("DOFFSET",Doffset_);
531 2 : if(offset_prior_==SC_GAUSS) {
532 0 : offset_mu_=offset_;
533 0 : if(Doffset_<0.) error("DOFFSET must be set when using OFFSET_PRIOR=GAUSS");
534 : } else {
535 2 : parse("OFFSET_MIN",offset_min_);
536 2 : parse("OFFSET_MAX",offset_max_);
537 2 : if(Doffset_<0) Doffset_ = 0.05*(offset_max_ - offset_min_);
538 2 : if(offset_max_<offset_min_) error("OFFSET_MAX and OFFSET_MIN must be set when using OFFSET_PRIOR=FLAT");
539 : }
540 : }
541 :
542 : // regression with zero intercept
543 19 : parse("REGRES_ZERO", nregres_zero_);
544 19 : if(nregres_zero_>0) {
545 : // set flag
546 0 : doregres_zero_=true;
547 : // check if already sampling scale and offset
548 0 : if(doscale_) error("REGRES_ZERO and SCALEDATA are mutually exclusive");
549 0 : if(dooffset_) error("REGRES_ZERO and ADDOFFSET are mutually exclusive");
550 : }
551 :
552 : std::vector<double> readsigma;
553 19 : parseVector("SIGMA0",readsigma);
554 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma.size()>1)
555 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
556 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
557 13 : sigma_.resize(readsigma.size());
558 13 : sigma_=readsigma;
559 6 : } else sigma_.resize(1, readsigma[0]);
560 :
561 : std::vector<double> readsigma_min;
562 19 : parseVector("SIGMA_MIN",readsigma_min);
563 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_min.size()>1)
564 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
565 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
566 13 : sigma_min_.resize(readsigma_min.size());
567 13 : sigma_min_=readsigma_min;
568 6 : } else sigma_min_.resize(1, readsigma_min[0]);
569 :
570 : std::vector<double> readsigma_max;
571 19 : parseVector("SIGMA_MAX",readsigma_max);
572 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1)
573 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
574 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
575 13 : sigma_max_.resize(readsigma_max.size());
576 13 : sigma_max_=readsigma_max;
577 6 : } else sigma_max_.resize(1, readsigma_max[0]);
578 :
579 19 : if(sigma_max_.size()!=sigma_min_.size()) error("The number of values for SIGMA_MIN and SIGMA_MAX must be the same");
580 :
581 : std::vector<double> read_dsigma;
582 19 : parseVector("DSIGMA",read_dsigma);
583 19 : if((noise_type_!=MGAUSS&&noise_type_!=MOUTLIERS&&noise_type_!=GENERIC)&&readsigma_max.size()>1)
584 0 : error("If you want to use more than one SIGMA you should use NOISETYPE=MGAUSS|MOUTLIERS|GENERIC");
585 19 : if(read_dsigma.size()>0) {
586 19 : Dsigma_.resize(read_dsigma.size());
587 19 : Dsigma_=read_dsigma;
588 : } else {
589 0 : Dsigma_.resize(sigma_max_.size(), -1.);
590 : /* in this case Dsigma is initialised after reading the restart file if present */
591 : }
592 :
593 : // monte carlo stuff
594 19 : parse("MC_STEPS",MCsteps_);
595 19 : parse("MC_CHUNKSIZE", MCchunksize_);
596 : // get temperature
597 19 : kbt_ = getkBT();
598 19 : if(kbt_==0.0) error("Unless the MD engine passes the temperature to plumed, you must specify it using TEMP");
599 :
600 19 : checkRead();
601 :
602 : // set sigma_bias
603 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
604 13 : if(sigma_.size()==1) {
605 13 : double tmp = sigma_[0];
606 13 : sigma_.resize(narg, tmp);
607 0 : } else if(sigma_.size()>1&&sigma_.size()!=narg) {
608 0 : error("SIGMA0 can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
609 : }
610 13 : if(sigma_min_.size()==1) {
611 13 : double tmp = sigma_min_[0];
612 13 : sigma_min_.resize(narg, tmp);
613 0 : } else if(sigma_min_.size()>1&&sigma_min_.size()!=narg) {
614 0 : error("SIGMA_MIN can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
615 : }
616 13 : if(sigma_max_.size()==1) {
617 13 : double tmp = sigma_max_[0];
618 13 : sigma_max_.resize(narg, tmp);
619 0 : } else if(sigma_max_.size()>1&&sigma_max_.size()!=narg) {
620 0 : error("SIGMA_MAX can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
621 : }
622 13 : if(Dsigma_.size()==1) {
623 13 : double tmp = Dsigma_[0];
624 13 : Dsigma_.resize(narg, tmp);
625 0 : } else if(Dsigma_.size()>1&&Dsigma_.size()!=narg) {
626 0 : error("DSIGMA can accept either one single value or as many values as the number of arguments (with NOISETYPE=MGAUSS|MOUTLIERS|GENERIC)");
627 : }
628 : }
629 :
630 19 : sigma_max_est_.resize(sigma_max_.size(), 0.);
631 :
632 19 : IFile restart_sfile;
633 19 : restart_sfile.link(*this);
634 19 : if(getRestart()&&restart_sfile.FileExist(status_file_name_)) {
635 4 : firstTime = false;
636 8 : for(unsigned i=0; i<nsel; i++) firstTimeW[i] = false;
637 4 : restart_sfile.open(status_file_name_);
638 4 : log.printf(" Restarting from %s\n", status_file_name_.c_str());
639 : double dummy;
640 8 : if(restart_sfile.scanField("time",dummy)) {
641 : // check for syncronisation
642 4 : std::vector<double> dummy_time(nrep_,0);
643 4 : if(master&&nrep_>1) {
644 2 : dummy_time[replica_] = dummy;
645 2 : multi_sim_comm.Sum(dummy_time);
646 : }
647 4 : comm.Sum(dummy_time);
648 8 : for(unsigned i=1; i<nrep_; i++) {
649 4 : std::string msg = "METAINFERENCE restart files " + status_file_name_ + " are not in sync";
650 4 : if(dummy_time[i]!=dummy_time[0]) plumed_merror(msg);
651 : }
652 : // nsel
653 8 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
654 : std::string msg_i;
655 4 : Tools::convert(i,msg_i);
656 : // narg
657 4 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
658 20 : for(unsigned j=0; j<narg; ++j) {
659 : std::string msg_j;
660 16 : Tools::convert(j,msg_j);
661 16 : std::string msg = msg_i+"_"+msg_j;
662 : double read_sm;
663 16 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
664 16 : sigma_mean2_last_[i][j][0] = read_sm*read_sm;
665 : }
666 : }
667 4 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
668 : double read_sm;
669 : std::string msg_j;
670 0 : Tools::convert(0,msg_j);
671 0 : std::string msg = msg_i+"_"+msg_j;
672 0 : restart_sfile.scanField("sigmaMean_"+msg,read_sm);
673 0 : for(unsigned j=0; j<narg; j++) sigma_mean2_last_[i][j][0] = read_sm*read_sm;
674 : }
675 : }
676 :
677 20 : for(unsigned i=0; i<sigma_.size(); ++i) {
678 : std::string msg;
679 16 : Tools::convert(i,msg);
680 32 : restart_sfile.scanField("sigma_"+msg,sigma_[i]);
681 : }
682 20 : for(unsigned i=0; i<sigma_max_.size(); ++i) {
683 : std::string msg;
684 16 : Tools::convert(i,msg);
685 16 : restart_sfile.scanField("sigma_max_"+msg,sigma_max_[i]);
686 16 : sigmamax_opt_done_=true;
687 : }
688 4 : if(noise_type_==GENERIC) {
689 0 : for(unsigned i=0; i<ftilde_.size(); ++i) {
690 : std::string msg;
691 0 : Tools::convert(i,msg);
692 0 : restart_sfile.scanField("ftilde_"+msg,ftilde_[i]);
693 : }
694 : }
695 4 : restart_sfile.scanField("scale0_",scale_);
696 4 : restart_sfile.scanField("offset0_",offset_);
697 :
698 8 : for(unsigned i=0; i<nsel; i++) {
699 : std::string msg;
700 4 : Tools::convert(i,msg);
701 : double tmp_w;
702 4 : restart_sfile.scanField("weight_"+msg,tmp_w);
703 4 : if(master) {
704 2 : average_weights_[i][replica_] = tmp_w;
705 2 : if(nrep_>1) multi_sim_comm.Sum(&average_weights_[i][0], nrep_);
706 : }
707 4 : comm.Sum(&average_weights_[i][0], nrep_);
708 : }
709 :
710 : }
711 4 : restart_sfile.scanField();
712 4 : restart_sfile.close();
713 : }
714 :
715 : /* If DSIGMA is not yet initialised do it now */
716 2415 : for(unsigned i=0; i<sigma_max_.size(); i++) if(Dsigma_[i]==-1) Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
717 :
718 19 : switch(noise_type_) {
719 1 : case GENERIC:
720 1 : log.printf(" with general metainference ");
721 1 : if(gen_likelihood_==LIKE_GAUSS) log.printf(" and a gaussian likelihood\n");
722 0 : else if(gen_likelihood_==LIKE_LOGN) log.printf(" and a log-normal likelihood\n");
723 1 : log.printf(" ensemble average parameter sampled with a step %lf of sigma_mean\n", Dftilde_);
724 : break;
725 1 : case GAUSS:
726 1 : log.printf(" with gaussian noise and a single noise parameter for all the data\n");
727 : break;
728 8 : case MGAUSS:
729 8 : log.printf(" with gaussian noise and a noise parameter for each data point\n");
730 : break;
731 5 : case OUTLIERS:
732 5 : log.printf(" with long tailed gaussian noise and a single noise parameter for all the data\n");
733 : break;
734 4 : case MOUTLIERS:
735 4 : log.printf(" with long tailed gaussian noise and a noise parameter for each data point\n");
736 : break;
737 : }
738 :
739 19 : if(doscale_) {
740 : // check that the scale value is the same for all replicas
741 12 : std::vector<double> dummy_scale(nrep_,0);
742 12 : if(master&&nrep_>1) {
743 6 : dummy_scale[replica_] = scale_;
744 6 : multi_sim_comm.Sum(dummy_scale);
745 : }
746 12 : comm.Sum(dummy_scale);
747 24 : for(unsigned i=1; i<nrep_; i++) {
748 12 : std::string msg = "The SCALE value must be the same for all replicas: check your input or restart file";
749 12 : if(dummy_scale[i]!=dummy_scale[0]) plumed_merror(msg);
750 : }
751 12 : log.printf(" sampling a common scaling factor with:\n");
752 12 : log.printf(" initial scale parameter %f\n",scale_);
753 12 : if(scale_prior_==SC_GAUSS) {
754 0 : log.printf(" gaussian prior with mean %f and width %f\n",scale_mu_,Dscale_);
755 : }
756 12 : if(scale_prior_==SC_FLAT) {
757 12 : log.printf(" flat prior between %f - %f\n",scale_min_,scale_max_);
758 12 : log.printf(" maximum MC move of scale parameter %f\n",Dscale_);
759 : }
760 : }
761 :
762 19 : if(dooffset_) {
763 : // check that the offset value is the same for all replicas
764 2 : std::vector<double> dummy_offset(nrep_,0);
765 2 : if(master&&nrep_>1) {
766 0 : dummy_offset[replica_] = offset_;
767 0 : multi_sim_comm.Sum(dummy_offset);
768 : }
769 2 : comm.Sum(dummy_offset);
770 2 : for(unsigned i=1; i<nrep_; i++) {
771 0 : std::string msg = "The OFFSET value must be the same for all replicas: check your input or restart file";
772 0 : if(dummy_offset[i]!=dummy_offset[0]) plumed_merror(msg);
773 : }
774 2 : log.printf(" sampling a common offset with:\n");
775 2 : log.printf(" initial offset parameter %f\n",offset_);
776 2 : if(offset_prior_==SC_GAUSS) {
777 0 : log.printf(" gaussian prior with mean %f and width %f\n",offset_mu_,Doffset_);
778 : }
779 2 : if(offset_prior_==SC_FLAT) {
780 2 : log.printf(" flat prior between %f - %f\n",offset_min_,offset_max_);
781 2 : log.printf(" maximum MC move of offset parameter %f\n",Doffset_);
782 : }
783 : }
784 :
785 19 : if(doregres_zero_)
786 0 : log.printf(" doing regression with zero intercept with stride: %d\n", nregres_zero_);
787 :
788 19 : log.printf(" number of experimental data points %u\n",narg);
789 19 : log.printf(" number of replicas %u\n",nrep_);
790 19 : log.printf(" initial data uncertainties");
791 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f", sigma_[i]);
792 19 : log.printf("\n");
793 19 : log.printf(" minimum data uncertainties");
794 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",sigma_min_[i]);
795 19 : log.printf("\n");
796 19 : log.printf(" maximum data uncertainties");
797 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",sigma_max_[i]);
798 19 : log.printf("\n");
799 19 : log.printf(" maximum MC move of data uncertainties");
800 2415 : for(unsigned i=0; i<sigma_.size(); ++i) log.printf(" %f",Dsigma_[i]);
801 19 : log.printf("\n");
802 19 : log.printf(" temperature of the system %f\n",kbt_);
803 19 : log.printf(" MC steps %u\n",MCsteps_);
804 19 : log.printf(" initial standard errors of the mean");
805 2415 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) log.printf(" %f", std::sqrt(sigma_mean2_[i]));
806 19 : log.printf("\n");
807 :
808 19 : if(do_reweight_) {
809 32 : addComponent("biasDer");
810 32 : componentIsNotPeriodic("biasDer");
811 32 : addComponent("weight");
812 32 : componentIsNotPeriodic("weight");
813 : }
814 :
815 38 : addComponent("neff");
816 19 : componentIsNotPeriodic("neff");
817 :
818 19 : if(doscale_ || doregres_zero_) {
819 24 : addComponent("scale");
820 12 : componentIsNotPeriodic("scale");
821 12 : valueScale=getPntrToComponent("scale");
822 : }
823 :
824 19 : if(dooffset_) {
825 4 : addComponent("offset");
826 2 : componentIsNotPeriodic("offset");
827 2 : valueOffset=getPntrToComponent("offset");
828 : }
829 :
830 19 : if(dooffset_||doscale_) {
831 28 : addComponent("acceptScale");
832 14 : componentIsNotPeriodic("acceptScale");
833 14 : valueAcceptScale=getPntrToComponent("acceptScale");
834 : }
835 :
836 19 : if(noise_type_==GENERIC) {
837 2 : addComponent("acceptFT");
838 1 : componentIsNotPeriodic("acceptFT");
839 1 : valueAcceptFT=getPntrToComponent("acceptFT");
840 : }
841 :
842 38 : addComponent("acceptSigma");
843 19 : componentIsNotPeriodic("acceptSigma");
844 19 : valueAccept=getPntrToComponent("acceptSigma");
845 :
846 19 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
847 2403 : for(unsigned i=0; i<sigma_mean2_.size(); ++i) {
848 2390 : std::string num; Tools::convert(i,num);
849 4780 : addComponent("sigmaMean-"+num); componentIsNotPeriodic("sigmaMean-"+num);
850 2390 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean-"+num));
851 4780 : getPntrToComponent("sigmaMean-"+num)->set(std::sqrt(sigma_mean2_[i]));
852 4780 : addComponent("sigma-"+num); componentIsNotPeriodic("sigma-"+num);
853 2390 : valueSigma.push_back(getPntrToComponent("sigma-"+num));
854 2390 : getPntrToComponent("sigma-"+num)->set(sigma_[i]);
855 2390 : if(noise_type_==GENERIC) {
856 4 : addComponent("ftilde-"+num); componentIsNotPeriodic("ftilde-"+num);
857 2 : valueFtilde.push_back(getPntrToComponent("ftilde-"+num));
858 : }
859 : }
860 13 : } else {
861 12 : addComponent("sigmaMean"); componentIsNotPeriodic("sigmaMean");
862 6 : valueSigmaMean.push_back(getPntrToComponent("sigmaMean"));
863 12 : getPntrToComponent("sigmaMean")->set(std::sqrt(sigma_mean2_[0]));
864 12 : addComponent("sigma"); componentIsNotPeriodic("sigma");
865 6 : valueSigma.push_back(getPntrToComponent("sigma"));
866 12 : getPntrToComponent("sigma")->set(sigma_[0]);
867 : }
868 :
869 : // initialize random seed
870 : unsigned iseed;
871 19 : if(master) {
872 11 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
873 11 : iseed = static_cast<unsigned>(ts)+replica_;
874 : } else {
875 8 : iseed = 0;
876 : }
877 19 : comm.Sum(&iseed, 1);
878 : // this is used for ftilde and sigma both the move and the acceptance
879 : // this is different for each replica
880 19 : random[0].setSeed(-iseed);
881 19 : if(doscale_||dooffset_) {
882 : // in this case we want the same seed everywhere
883 14 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
884 14 : iseed = static_cast<unsigned>(ts);
885 14 : if(master&&nrep_>1) multi_sim_comm.Bcast(iseed,0);
886 14 : comm.Bcast(iseed,0);
887 : // this is used for scale and offset sampling and acceptance
888 14 : random[1].setSeed(-iseed);
889 : }
890 : // this is used for random chunk of sigmas, and it is different for each replica
891 19 : if(master) {
892 11 : auto ts = std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now()).time_since_epoch().count();
893 11 : iseed = static_cast<unsigned>(ts)+replica_;
894 : } else {
895 8 : iseed = 0;
896 : }
897 19 : comm.Sum(&iseed, 1);
898 19 : random[2].setSeed(-iseed);
899 :
900 : // outfile stuff
901 19 : if(write_stride_>0) {
902 19 : sfile_.link(*this);
903 19 : sfile_.open(status_file_name_);
904 : }
905 :
906 38 : log<<" Bibliography "<<plumed.cite("Bonomi, Camilloni, Cavalli, Vendruscolo, Sci. Adv. 2, e150117 (2016)");
907 35 : if(do_reweight_) log<<plumed.cite("Bonomi, Camilloni, Vendruscolo, Sci. Rep. 6, 31232 (2016)");
908 19 : if(do_optsigmamean_>0) log<<plumed.cite("Loehr, Jussupow, Camilloni, J. Chem. Phys. 146, 165102 (2017)");
909 38 : log<<plumed.cite("Bonomi, Camilloni, Bioinformatics, 33, 3999 (2017)");
910 19 : log<<"\n";
911 38 : }
912 :
913 38 : Metainference::~Metainference()
914 : {
915 19 : if(sfile_.isOpen()) sfile_.close();
916 114 : }
917 :
918 156 : double Metainference::getEnergySP(const std::vector<double> &mean, const std::vector<double> &sigma,
919 : const double scale, const double offset)
920 : {
921 156 : const double scale2 = scale*scale;
922 156 : const double sm2 = sigma_mean2_[0];
923 156 : const double ss2 = sigma[0]*sigma[0] + scale2*sm2;
924 156 : const double sss = sigma[0]*sigma[0] + sm2;
925 :
926 : double ene = 0.0;
927 156 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
928 : {
929 : #pragma omp for reduction( + : ene)
930 : for(unsigned i=0; i<narg; ++i) {
931 : const double dev = scale*mean[i]-parameters[i]+offset;
932 : const double a2 = 0.5*dev*dev + ss2;
933 : if(sm2 > 0.0) {
934 : ene += std::log(2.0*a2/(1.0-std::exp(-a2/sm2)));
935 : }
936 : else {
937 : ene += std::log(2.0*a2);
938 : }
939 : }
940 : }
941 : // add one single Jeffrey's prior and one normalisation per data point
942 156 : ene += 0.5*std::log(sss) + static_cast<double>(narg)*0.5*std::log(0.5*M_PI*M_PI/ss2);
943 156 : if(doscale_ || doregres_zero_) ene += 0.5*std::log(sss);
944 156 : if(dooffset_) ene += 0.5*std::log(sss);
945 156 : return kbt_ * ene;
946 : }
947 :
948 144 : double Metainference::getEnergySPE(const std::vector<double> &mean, const std::vector<double> &sigma,
949 : const double scale, const double offset)
950 : {
951 144 : const double scale2 = scale*scale;
952 : double ene = 0.0;
953 144 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
954 : {
955 : #pragma omp for reduction( + : ene)
956 : for(unsigned i=0; i<narg; ++i) {
957 : const double sm2 = sigma_mean2_[i];
958 : const double ss2 = sigma[i]*sigma[i] + scale2*sm2;
959 : const double sss = sigma[i]*sigma[i] + sm2;
960 : const double dev = scale*mean[i]-parameters[i]+offset;
961 : const double a2 = 0.5*dev*dev + ss2;
962 : if(sm2 > 0.0) {
963 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2/(1.0-std::exp(-a2/sm2)));
964 : }
965 : else {
966 : ene += 0.5*std::log(sss) + 0.5*std::log(0.5*M_PI*M_PI/ss2) + std::log(2.0*a2);
967 : }
968 : if(doscale_ || doregres_zero_) ene += 0.5*std::log(sss);
969 : if(dooffset_) ene += 0.5*std::log(sss);
970 : }
971 : }
972 144 : return kbt_ * ene;
973 : }
974 :
975 48 : double Metainference::getEnergyMIGEN(const std::vector<double> &mean, const std::vector<double> &ftilde, const std::vector<double> &sigma,
976 : const double scale, const double offset)
977 : {
978 : double ene = 0.0;
979 48 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
980 : {
981 : #pragma omp for reduction( + : ene)
982 : for(unsigned i=0; i<narg; ++i) {
983 : const double inv_sb2 = 1./(sigma[i]*sigma[i]);
984 : const double inv_sm2 = 1./sigma_mean2_[i];
985 : double devb = 0;
986 : if(gen_likelihood_==LIKE_GAUSS) devb = scale*ftilde[i]-parameters[i]+offset;
987 : else if(gen_likelihood_==LIKE_LOGN) devb = std::log(scale*ftilde[i]/parameters[i]);
988 : double devm = mean[i] - ftilde[i];
989 : // deviation + normalisation + jeffrey
990 : double normb = 0.;
991 : if(gen_likelihood_==LIKE_GAUSS) normb = -0.5*std::log(0.5/M_PI*inv_sb2);
992 : else if(gen_likelihood_==LIKE_LOGN) normb = -0.5*std::log(0.5/M_PI*inv_sb2/(parameters[i]*parameters[i]));
993 : const double normm = -0.5*std::log(0.5/M_PI*inv_sm2);
994 : const double jeffreys = -0.5*std::log(2.*inv_sb2);
995 : ene += 0.5*devb*devb*inv_sb2 + 0.5*devm*devm*inv_sm2 + normb + normm + jeffreys;
996 : if(doscale_ || doregres_zero_) ene += jeffreys;
997 : if(dooffset_) ene += jeffreys;
998 : }
999 : }
1000 48 : return kbt_ * ene;
1001 : }
1002 :
1003 36 : double Metainference::getEnergyGJ(const std::vector<double> &mean, const std::vector<double> &sigma,
1004 : const double scale, const double offset)
1005 : {
1006 36 : const double scale2 = scale*scale;
1007 36 : const double inv_s2 = 1./(sigma[0]*sigma[0] + scale2*sigma_mean2_[0]);
1008 36 : const double inv_sss = 1./(sigma[0]*sigma[0] + sigma_mean2_[0]);
1009 :
1010 : double ene = 0.0;
1011 36 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1012 : {
1013 : #pragma omp for reduction( + : ene)
1014 : for(unsigned i=0; i<narg; ++i) {
1015 : double dev = scale*mean[i]-parameters[i]+offset;
1016 : ene += 0.5*dev*dev*inv_s2;
1017 : }
1018 : }
1019 36 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
1020 36 : const double jeffreys = -0.5*std::log(2.*inv_sss);
1021 : // add Jeffrey's prior in case one sigma for all data points + one normalisation per datapoint
1022 36 : ene += jeffreys + static_cast<double>(narg)*normalisation;
1023 36 : if(doscale_ || doregres_zero_) ene += jeffreys;
1024 36 : if(dooffset_) ene += jeffreys;
1025 :
1026 36 : return kbt_ * ene;
1027 : }
1028 :
1029 152 : double Metainference::getEnergyGJE(const std::vector<double> &mean, const std::vector<double> &sigma,
1030 : const double scale, const double offset)
1031 : {
1032 152 : const double scale2 = scale*scale;
1033 :
1034 : double ene = 0.0;
1035 152 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(ene)
1036 : {
1037 : #pragma omp for reduction( + : ene)
1038 : for(unsigned i=0; i<narg; ++i) {
1039 : const double inv_s2 = 1./(sigma[i]*sigma[i] + scale2*sigma_mean2_[i]);
1040 : const double inv_sss = 1./(sigma[i]*sigma[i] + sigma_mean2_[i]);
1041 : double dev = scale*mean[i]-parameters[i]+offset;
1042 : // deviation + normalisation + jeffrey
1043 : const double normalisation = -0.5*std::log(0.5/M_PI*inv_s2);
1044 : const double jeffreys = -0.5*std::log(2.*inv_sss);
1045 : ene += 0.5*dev*dev*inv_s2 + normalisation + jeffreys;
1046 : if(doscale_ || doregres_zero_) ene += jeffreys;
1047 : if(dooffset_) ene += jeffreys;
1048 : }
1049 : }
1050 152 : return kbt_ * ene;
1051 : }
1052 :
1053 12 : void Metainference::moveTilde(const std::vector<double> &mean_, double &old_energy)
1054 : {
1055 12 : std::vector<double> new_ftilde(sigma_.size());
1056 12 : new_ftilde = ftilde_;
1057 :
1058 : // change all tildes
1059 36 : for(unsigned j=0; j<sigma_.size(); j++) {
1060 24 : const double r3 = random[0].Gaussian();
1061 24 : const double ds3 = Dftilde_*std::sqrt(sigma_mean2_[j])*r3;
1062 24 : new_ftilde[j] = ftilde_[j] + ds3;
1063 : }
1064 : // calculate new energy
1065 12 : double new_energy = getEnergyMIGEN(mean_,new_ftilde,sigma_,scale_,offset_);
1066 :
1067 : // accept or reject
1068 12 : const double delta = ( new_energy - old_energy ) / kbt_;
1069 : // if delta is negative always accept move
1070 12 : if( delta <= 0.0 ) {
1071 12 : old_energy = new_energy;
1072 12 : ftilde_ = new_ftilde;
1073 12 : MCacceptFT_++;
1074 : // otherwise extract random number
1075 : } else {
1076 0 : const double s = random[0].RandU01();
1077 0 : if( s < std::exp(-delta) ) {
1078 0 : old_energy = new_energy;
1079 0 : ftilde_ = new_ftilde;
1080 0 : MCacceptFT_++;
1081 : }
1082 : }
1083 12 : }
1084 :
1085 168 : void Metainference::moveScaleOffset(const std::vector<double> &mean_, double &old_energy)
1086 : {
1087 168 : double new_scale = scale_;
1088 :
1089 168 : if(doscale_) {
1090 144 : if(scale_prior_==SC_FLAT) {
1091 144 : const double r1 = random[1].Gaussian();
1092 144 : const double ds1 = Dscale_*r1;
1093 144 : new_scale += ds1;
1094 : // check boundaries
1095 144 : if(new_scale > scale_max_) {new_scale = 2.0 * scale_max_ - new_scale;}
1096 144 : if(new_scale < scale_min_) {new_scale = 2.0 * scale_min_ - new_scale;}
1097 : } else {
1098 0 : const double r1 = random[1].Gaussian();
1099 0 : const double ds1 = 0.5*(scale_mu_-new_scale)+Dscale_*std::exp(1)/M_PI*r1;
1100 0 : new_scale += ds1;
1101 : }
1102 : }
1103 :
1104 168 : double new_offset = offset_;
1105 :
1106 168 : if(dooffset_) {
1107 24 : if(offset_prior_==SC_FLAT) {
1108 24 : const double r1 = random[1].Gaussian();
1109 24 : const double ds1 = Doffset_*r1;
1110 24 : new_offset += ds1;
1111 : // check boundaries
1112 24 : if(new_offset > offset_max_) {new_offset = 2.0 * offset_max_ - new_offset;}
1113 24 : if(new_offset < offset_min_) {new_offset = 2.0 * offset_min_ - new_offset;}
1114 : } else {
1115 0 : const double r1 = random[1].Gaussian();
1116 0 : const double ds1 = 0.5*(offset_mu_-new_offset)+Doffset_*std::exp(1)/M_PI*r1;
1117 0 : new_offset += ds1;
1118 : }
1119 : }
1120 :
1121 : // calculate new energy
1122 : double new_energy = 0.;
1123 :
1124 168 : switch(noise_type_) {
1125 12 : case GAUSS:
1126 12 : new_energy = getEnergyGJ(mean_,sigma_,new_scale,new_offset);
1127 : break;
1128 48 : case MGAUSS:
1129 48 : new_energy = getEnergyGJE(mean_,sigma_,new_scale,new_offset);
1130 : break;
1131 48 : case OUTLIERS:
1132 48 : new_energy = getEnergySP(mean_,sigma_,new_scale,new_offset);
1133 : break;
1134 48 : case MOUTLIERS:
1135 48 : new_energy = getEnergySPE(mean_,sigma_,new_scale,new_offset);
1136 : break;
1137 12 : case GENERIC:
1138 12 : new_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,new_scale,new_offset);
1139 : break;
1140 : }
1141 :
1142 : // for the scale/offset we need to consider the total energy
1143 168 : std::vector<double> totenergies(2);
1144 168 : if(master) {
1145 96 : totenergies[0] = old_energy;
1146 96 : totenergies[1] = new_energy;
1147 96 : if(nrep_>1) multi_sim_comm.Sum(totenergies);
1148 : } else {
1149 72 : totenergies[0] = 0;
1150 72 : totenergies[1] = 0;
1151 : }
1152 168 : comm.Sum(totenergies);
1153 :
1154 : // accept or reject
1155 168 : const double delta = ( totenergies[1] - totenergies[0] ) / kbt_;
1156 : // if delta is negative always accept move
1157 168 : if( delta <= 0.0 ) {
1158 168 : old_energy = new_energy;
1159 168 : scale_ = new_scale;
1160 168 : offset_ = new_offset;
1161 168 : MCacceptScale_++;
1162 : // otherwise extract random number
1163 : } else {
1164 0 : double s = random[1].RandU01();
1165 0 : if( s < std::exp(-delta) ) {
1166 0 : old_energy = new_energy;
1167 0 : scale_ = new_scale;
1168 0 : offset_ = new_offset;
1169 0 : MCacceptScale_++;
1170 : }
1171 : }
1172 168 : }
1173 :
1174 178 : void Metainference::moveSigmas(const std::vector<double> &mean_, double &old_energy, const unsigned i, const std::vector<unsigned> &indices, bool &breaknow)
1175 : {
1176 178 : std::vector<double> new_sigma(sigma_.size());
1177 178 : new_sigma = sigma_;
1178 :
1179 : // change MCchunksize_ sigmas
1180 178 : if (MCchunksize_ > 0) {
1181 0 : if ((MCchunksize_ * i) >= sigma_.size()) {
1182 : // This means we are not moving any sigma, so we should break immediately
1183 0 : breaknow = true;
1184 : }
1185 :
1186 : // change random sigmas
1187 0 : for(unsigned j=0; j<MCchunksize_; j++) {
1188 0 : const unsigned shuffle_index = j + MCchunksize_ * i;
1189 0 : if (shuffle_index >= sigma_.size()) {
1190 : // Going any further will segfault but we should still evaluate the sigmas we changed
1191 : break;
1192 : }
1193 0 : const unsigned index = indices[shuffle_index];
1194 0 : const double r2 = random[0].Gaussian();
1195 0 : const double ds2 = Dsigma_[index]*r2;
1196 0 : new_sigma[index] = sigma_[index] + ds2;
1197 : // check boundaries
1198 0 : if(new_sigma[index] > sigma_max_[index]) {new_sigma[index] = 2.0 * sigma_max_[index] - new_sigma[index];}
1199 0 : if(new_sigma[index] < sigma_min_[index]) {new_sigma[index] = 2.0 * sigma_min_[index] - new_sigma[index];}
1200 : }
1201 : } else {
1202 : // change all sigmas
1203 3008 : for(unsigned j=0; j<sigma_.size(); j++) {
1204 2830 : const double r2 = random[0].Gaussian();
1205 2830 : const double ds2 = Dsigma_[j]*r2;
1206 2830 : new_sigma[j] = sigma_[j] + ds2;
1207 : // check boundaries
1208 2830 : if(new_sigma[j] > sigma_max_[j]) {new_sigma[j] = 2.0 * sigma_max_[j] - new_sigma[j];}
1209 2830 : if(new_sigma[j] < sigma_min_[j]) {new_sigma[j] = 2.0 * sigma_min_[j] - new_sigma[j];}
1210 : }
1211 : }
1212 :
1213 178 : if (breaknow) {
1214 : // We didnt move any sigmas, so no sense in evaluating anything
1215 : return;
1216 : }
1217 :
1218 : // calculate new energy
1219 : double new_energy = 0.;
1220 178 : switch(noise_type_) {
1221 12 : case GAUSS:
1222 12 : new_energy = getEnergyGJ(mean_,new_sigma,scale_,offset_);
1223 : break;
1224 52 : case MGAUSS:
1225 52 : new_energy = getEnergyGJE(mean_,new_sigma,scale_,offset_);
1226 : break;
1227 54 : case OUTLIERS:
1228 54 : new_energy = getEnergySP(mean_,new_sigma,scale_,offset_);
1229 : break;
1230 48 : case MOUTLIERS:
1231 48 : new_energy = getEnergySPE(mean_,new_sigma,scale_,offset_);
1232 : break;
1233 12 : case GENERIC:
1234 12 : new_energy = getEnergyMIGEN(mean_,ftilde_,new_sigma,scale_,offset_);
1235 : break;
1236 : }
1237 :
1238 : // accept or reject
1239 178 : const double delta = ( new_energy - old_energy ) / kbt_;
1240 : // if delta is negative always accept move
1241 178 : if( delta <= 0.0 ) {
1242 178 : old_energy = new_energy;
1243 178 : sigma_ = new_sigma;
1244 178 : MCaccept_++;
1245 : // otherwise extract random number
1246 : } else {
1247 0 : const double s = random[0].RandU01();
1248 0 : if( s < std::exp(-delta) ) {
1249 0 : old_energy = new_energy;
1250 0 : sigma_ = new_sigma;
1251 0 : MCaccept_++;
1252 : }
1253 : }
1254 : }
1255 :
1256 178 : double Metainference::doMonteCarlo(const std::vector<double> &mean_)
1257 : {
1258 : // calculate old energy with the updated coordinates
1259 178 : double old_energy=0.;
1260 :
1261 178 : switch(noise_type_) {
1262 12 : case GAUSS:
1263 12 : old_energy = getEnergyGJ(mean_,sigma_,scale_,offset_);
1264 12 : break;
1265 52 : case MGAUSS:
1266 52 : old_energy = getEnergyGJE(mean_,sigma_,scale_,offset_);
1267 52 : break;
1268 54 : case OUTLIERS:
1269 54 : old_energy = getEnergySP(mean_,sigma_,scale_,offset_);
1270 54 : break;
1271 48 : case MOUTLIERS:
1272 48 : old_energy = getEnergySPE(mean_,sigma_,scale_,offset_);
1273 48 : break;
1274 12 : case GENERIC:
1275 12 : old_energy = getEnergyMIGEN(mean_,ftilde_,sigma_,scale_,offset_);
1276 12 : break;
1277 : }
1278 :
1279 : // do not run MC if this is a replica-exchange trial
1280 178 : if(!getExchangeStep()) {
1281 :
1282 : // Create std::vector of random sigma indices
1283 : std::vector<unsigned> indices;
1284 178 : if (MCchunksize_ > 0) {
1285 0 : for (unsigned j=0; j<sigma_.size(); j++) {
1286 0 : indices.push_back(j);
1287 : }
1288 0 : random[2].Shuffle(indices);
1289 : }
1290 178 : bool breaknow = false;
1291 :
1292 : // cycle on MC steps
1293 356 : for(unsigned i=0; i<MCsteps_; ++i) {
1294 178 : MCtrial_++;
1295 : // propose move for ftilde
1296 178 : if(noise_type_==GENERIC) moveTilde(mean_, old_energy);
1297 : // propose move for scale and/or offset
1298 178 : if(doscale_||dooffset_) moveScaleOffset(mean_, old_energy);
1299 : // propose move for sigma
1300 178 : moveSigmas(mean_, old_energy, i, indices, breaknow);
1301 : // exit from the loop if this is the case
1302 178 : if(breaknow) break;
1303 : }
1304 :
1305 : /* save the result of the sampling */
1306 : /* ftilde */
1307 178 : if(noise_type_==GENERIC) {
1308 12 : double accept = static_cast<double>(MCacceptFT_) / static_cast<double>(MCtrial_);
1309 12 : valueAcceptFT->set(accept);
1310 36 : for(unsigned i=0; i<sigma_.size(); i++) valueFtilde[i]->set(ftilde_[i]);
1311 : }
1312 : /* scale and offset */
1313 178 : if(doscale_ || doregres_zero_) valueScale->set(scale_);
1314 178 : if(dooffset_) valueOffset->set(offset_);
1315 178 : if(doscale_||dooffset_) {
1316 168 : double accept = static_cast<double>(MCacceptScale_) / static_cast<double>(MCtrial_);
1317 168 : valueAcceptScale->set(accept);
1318 : }
1319 : /* sigmas */
1320 3008 : for(unsigned i=0; i<sigma_.size(); i++) valueSigma[i]->set(sigma_[i]);
1321 178 : double accept = static_cast<double>(MCaccept_) / static_cast<double>(MCtrial_);
1322 178 : valueAccept->set(accept);
1323 : }
1324 :
1325 : // here we sum the score over the replicas to get the full metainference score that we save as a bias
1326 178 : if(master) {
1327 104 : if(nrep_>1) multi_sim_comm.Sum(old_energy);
1328 : } else {
1329 74 : old_energy=0;
1330 : }
1331 178 : comm.Sum(old_energy);
1332 :
1333 : // this is the energy with current coordinates and parameters
1334 178 : return old_energy;
1335 : }
1336 :
1337 : /*
1338 : In the following energy-force functions we don't add the normalisation and the jeffreys priors
1339 : because they are not needed for the forces, the correct MetaInference energy is the one calculated
1340 : in the Monte-Carlo
1341 : */
1342 :
1343 54 : void Metainference::getEnergyForceSP(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1344 : const std::vector<double> &dmean_b)
1345 : {
1346 54 : const double scale2 = scale_*scale_;
1347 54 : const double sm2 = sigma_mean2_[0];
1348 54 : const double ss2 = sigma_[0]*sigma_[0] + scale2*sm2;
1349 54 : std::vector<double> f(narg,0);
1350 :
1351 54 : if(master) {
1352 30 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1353 : {
1354 : #pragma omp for
1355 : for(unsigned i=0; i<narg; ++i) {
1356 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1357 : const double a2 = 0.5*dev*dev + ss2;
1358 : if(sm2 > 0.0) {
1359 : const double t = std::exp(-a2/sm2);
1360 : const double dt = 1./t;
1361 : const double dit = 1./(1.-dt);
1362 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1363 : }
1364 : else {
1365 : f[i] = -scale_*dev*(1./a2);
1366 : }
1367 : }
1368 : }
1369 : // collect contribution to forces and energy from other replicas
1370 30 : if(nrep_>1) multi_sim_comm.Sum(&f[0],narg);
1371 : }
1372 : // intra-replica summation
1373 54 : comm.Sum(&f[0],narg);
1374 :
1375 : double w_tmp = 0.;
1376 288 : for(unsigned i=0; i<narg; ++i) {
1377 234 : setOutputForce(i, kbt_*f[i]*dmean_x[i]);
1378 234 : w_tmp += kbt_*f[i]*dmean_b[i];
1379 : }
1380 :
1381 54 : if(do_reweight_) {
1382 48 : setOutputForce(narg, w_tmp);
1383 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1384 : }
1385 54 : }
1386 :
1387 48 : void Metainference::getEnergyForceSPE(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1388 : const std::vector<double> &dmean_b)
1389 : {
1390 48 : const double scale2 = scale_*scale_;
1391 48 : std::vector<double> f(narg,0);
1392 :
1393 48 : if(master) {
1394 24 : #pragma omp parallel num_threads(OpenMP::getNumThreads())
1395 : {
1396 : #pragma omp for
1397 : for(unsigned i=0; i<narg; ++i) {
1398 : const double sm2 = sigma_mean2_[i];
1399 : const double ss2 = sigma_[i]*sigma_[i] + scale2*sm2;
1400 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1401 : const double a2 = 0.5*dev*dev + ss2;
1402 : if(sm2 > 0.0) {
1403 : const double t = std::exp(-a2/sm2);
1404 : const double dt = 1./t;
1405 : const double dit = 1./(1.-dt);
1406 : f[i] = -scale_*dev*(dit/sm2 + 1./a2);
1407 : }
1408 : else {
1409 : f[i] = -scale_*dev*(1./a2);
1410 : }
1411 : }
1412 : }
1413 : // collect contribution to forces and energy from other replicas
1414 24 : if(nrep_>1) multi_sim_comm.Sum(&f[0],narg);
1415 : }
1416 48 : comm.Sum(&f[0],narg);
1417 :
1418 : double w_tmp = 0.;
1419 240 : for(unsigned i=0; i<narg; ++i) {
1420 192 : setOutputForce(i, kbt_ * dmean_x[i] * f[i]);
1421 192 : w_tmp += kbt_ * dmean_b[i] *f[i];
1422 : }
1423 :
1424 48 : if(do_reweight_) {
1425 48 : setOutputForce(narg, w_tmp);
1426 96 : getPntrToComponent("biasDer")->set(-w_tmp);
1427 : }
1428 48 : }
1429 :
1430 12 : void Metainference::getEnergyForceGJ(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1431 : const std::vector<double> &dmean_b)
1432 : {
1433 12 : const double scale2 = scale_*scale_;
1434 12 : double inv_s2=0.;
1435 :
1436 12 : if(master) {
1437 12 : inv_s2 = 1./(sigma_[0]*sigma_[0] + scale2*sigma_mean2_[0]);
1438 12 : if(nrep_>1) multi_sim_comm.Sum(inv_s2);
1439 : }
1440 12 : comm.Sum(inv_s2);
1441 :
1442 : double w_tmp = 0.;
1443 12 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1444 : {
1445 : #pragma omp for reduction( + : w_tmp)
1446 : for(unsigned i=0; i<narg; ++i) {
1447 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1448 : const double mult = dev*scale_*inv_s2;
1449 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1450 : w_tmp += kbt_*dmean_b[i]*mult;
1451 : }
1452 : }
1453 :
1454 12 : if(do_reweight_) {
1455 0 : setOutputForce(narg, -w_tmp);
1456 0 : getPntrToComponent("biasDer")->set(w_tmp);
1457 : }
1458 12 : }
1459 :
1460 52 : void Metainference::getEnergyForceGJE(const std::vector<double> &mean, const std::vector<double> &dmean_x,
1461 : const std::vector<double> &dmean_b)
1462 : {
1463 52 : const double scale2 = scale_*scale_;
1464 52 : std::vector<double> inv_s2(sigma_.size(),0.);
1465 :
1466 52 : if(master) {
1467 1300 : for(unsigned i=0; i<sigma_.size(); ++i) inv_s2[i] = 1./(sigma_[i]*sigma_[i] + scale2*sigma_mean2_[i]);
1468 26 : if(nrep_>1) multi_sim_comm.Sum(&inv_s2[0],sigma_.size());
1469 : }
1470 52 : comm.Sum(&inv_s2[0],sigma_.size());
1471 :
1472 : double w_tmp = 0.;
1473 52 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(w_tmp)
1474 : {
1475 : #pragma omp for reduction( + : w_tmp)
1476 : for(unsigned i=0; i<narg; ++i) {
1477 : const double dev = scale_*mean[i]-parameters[i]+offset_;
1478 : const double mult = dev*scale_*inv_s2[i];
1479 : setOutputForce(i, -kbt_*dmean_x[i]*mult);
1480 : w_tmp += kbt_*dmean_b[i]*mult;
1481 : }
1482 : }
1483 :
1484 52 : if(do_reweight_) {
1485 52 : setOutputForce(narg, -w_tmp);
1486 104 : getPntrToComponent("biasDer")->set(w_tmp);
1487 : }
1488 52 : }
1489 :
1490 12 : void Metainference::getEnergyForceMIGEN(const std::vector<double> &mean, const std::vector<double> &dmean_x, const std::vector<double> &dmean_b)
1491 : {
1492 12 : std::vector<double> inv_s2(sigma_.size(),0.);
1493 12 : std::vector<double> dev(sigma_.size(),0.);
1494 12 : std::vector<double> dev2(sigma_.size(),0.);
1495 :
1496 36 : for(unsigned i=0; i<sigma_.size(); ++i) {
1497 24 : inv_s2[i] = 1./sigma_mean2_[i];
1498 24 : if(master) {
1499 24 : dev[i] = (mean[i]-ftilde_[i]);
1500 24 : dev2[i] = dev[i]*dev[i];
1501 : }
1502 : }
1503 12 : if(master&&nrep_>1) {
1504 0 : multi_sim_comm.Sum(&dev[0],dev.size());
1505 0 : multi_sim_comm.Sum(&dev2[0],dev2.size());
1506 : }
1507 12 : comm.Sum(&dev[0],dev.size());
1508 12 : comm.Sum(&dev2[0],dev2.size());
1509 :
1510 : double dene_b = 0.;
1511 12 : #pragma omp parallel num_threads(OpenMP::getNumThreads()) shared(dene_b)
1512 : {
1513 : #pragma omp for reduction( + : dene_b)
1514 : for(unsigned i=0; i<narg; ++i) {
1515 : const double dene_x = kbt_*inv_s2[i]*dmean_x[i]*dev[i];
1516 : dene_b += kbt_*inv_s2[i]*dmean_b[i]*dev[i];
1517 : setOutputForce(i, -dene_x);
1518 : }
1519 : }
1520 :
1521 12 : if(do_reweight_) {
1522 0 : setOutputForce(narg, -dene_b);
1523 0 : getPntrToComponent("biasDer")->set(dene_b);
1524 : }
1525 12 : }
1526 :
1527 178 : void Metainference::get_weights(const unsigned iselect, double &weight, double &norm, double &neff)
1528 : {
1529 178 : const double dnrep = static_cast<double>(nrep_);
1530 : // calculate the weights either from BIAS
1531 178 : if(do_reweight_) {
1532 148 : std::vector<double> bias(nrep_,0);
1533 148 : if(master) {
1534 74 : bias[replica_] = getArgument(narg);
1535 74 : if(nrep_>1) multi_sim_comm.Sum(&bias[0], nrep_);
1536 : }
1537 148 : comm.Sum(&bias[0], nrep_);
1538 :
1539 : // accumulate weights
1540 148 : const double decay = 1./static_cast<double> (average_weights_stride_);
1541 148 : if(!firstTimeW[iselect]) {
1542 408 : for(unsigned i=0; i<nrep_; ++i) {
1543 272 : const double delta=bias[i]-average_weights_[iselect][i];
1544 272 : average_weights_[iselect][i]+=decay*delta;
1545 : }
1546 : } else {
1547 : firstTimeW[iselect] = false;
1548 36 : for(unsigned i=0; i<nrep_; ++i) average_weights_[iselect][i] = bias[i];
1549 : }
1550 :
1551 : // set average back into bias and set norm to one
1552 148 : const double maxbias = *(std::max_element(average_weights_[iselect].begin(), average_weights_[iselect].end()));
1553 444 : for(unsigned i=0; i<nrep_; ++i) bias[i] = std::exp((average_weights_[iselect][i]-maxbias)/kbt_);
1554 : // set local weight, norm and weight variance
1555 148 : weight = bias[replica_];
1556 : double w2=0.;
1557 444 : for(unsigned i=0; i<nrep_; ++i) {
1558 296 : w2 += bias[i]*bias[i];
1559 296 : norm += bias[i];
1560 : }
1561 148 : neff = norm*norm/w2;
1562 296 : getPntrToComponent("weight")->set(weight/norm);
1563 : } else {
1564 : // or arithmetic ones
1565 30 : neff = dnrep;
1566 30 : weight = 1.0;
1567 30 : norm = dnrep;
1568 : }
1569 178 : getPntrToComponent("neff")->set(neff);
1570 178 : }
1571 :
1572 178 : void Metainference::get_sigma_mean(const unsigned iselect, const double weight, const double norm, const double neff, const std::vector<double> &mean)
1573 : {
1574 178 : const double dnrep = static_cast<double>(nrep_);
1575 178 : std::vector<double> sigma_mean2_tmp(sigma_mean2_.size(), 0.);
1576 :
1577 178 : if(do_optsigmamean_>0) {
1578 : // remove first entry of the history std::vector
1579 0 : if(sigma_mean2_last_[iselect][0].size()==optsigmamean_stride_&&optsigmamean_stride_>0)
1580 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_last_[iselect][i].erase(sigma_mean2_last_[iselect][i].begin());
1581 : /* this is the current estimate of sigma mean for each argument
1582 : there is one of this per argument in any case because it is
1583 : the maximum among these to be used in case of GAUSS/OUTLIER */
1584 0 : std::vector<double> sigma_mean2_now(narg,0);
1585 0 : if(master) {
1586 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_now[i] = weight*(getArgument(i)-mean[i])*(getArgument(i)-mean[i]);
1587 0 : if(nrep_>1) multi_sim_comm.Sum(&sigma_mean2_now[0], narg);
1588 : }
1589 0 : comm.Sum(&sigma_mean2_now[0], narg);
1590 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_now[i] *= 1.0/(neff-1.)/norm;
1591 :
1592 : // add sigma_mean2 to history
1593 0 : if(optsigmamean_stride_>0) {
1594 0 : for(unsigned i=0; i<narg; ++i) sigma_mean2_last_[iselect][i].push_back(sigma_mean2_now[i]);
1595 : } else {
1596 0 : for(unsigned i=0; i<narg; ++i) if(sigma_mean2_now[i] > sigma_mean2_last_[iselect][i][0]) sigma_mean2_last_[iselect][i][0] = sigma_mean2_now[i];
1597 : }
1598 :
1599 0 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1600 0 : for(unsigned i=0; i<narg; ++i) {
1601 : /* set to the maximum in history std::vector */
1602 0 : sigma_mean2_tmp[i] = *max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end());
1603 : /* the standard error of the mean */
1604 0 : valueSigmaMean[i]->set(std::sqrt(sigma_mean2_tmp[i]));
1605 0 : if(noise_type_==GENERIC) {
1606 0 : sigma_min_[i] = std::sqrt(sigma_mean2_tmp[i]);
1607 0 : if(sigma_[i] < sigma_min_[i]) sigma_[i] = sigma_min_[i];
1608 : }
1609 : }
1610 0 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1611 : // find maximum for each data point
1612 : std::vector <double> max_values;
1613 0 : for(unsigned i=0; i<narg; ++i) max_values.push_back(*max_element(sigma_mean2_last_[iselect][i].begin(), sigma_mean2_last_[iselect][i].end()));
1614 : // find maximum across data points
1615 0 : const double max_now = *max_element(max_values.begin(), max_values.end());
1616 : // set new value
1617 0 : sigma_mean2_tmp[0] = max_now;
1618 0 : valueSigmaMean[0]->set(std::sqrt(sigma_mean2_tmp[0]));
1619 : }
1620 : // endif sigma mean optimization
1621 : // start sigma max optimization
1622 0 : if(do_optsigmamean_>1&&!sigmamax_opt_done_) {
1623 0 : for(unsigned i=0; i<sigma_max_.size(); i++) {
1624 0 : if(sigma_max_est_[i]<sigma_mean2_tmp[i]&&optimized_step_>optsigmamean_stride_) sigma_max_est_[i]=sigma_mean2_tmp[i];
1625 : // ready to set once and for all the value of sigma_max
1626 0 : if(optimized_step_==N_optimized_step_) {
1627 0 : sigmamax_opt_done_=true;
1628 0 : for(unsigned i=0; i<sigma_max_.size(); i++) {
1629 0 : sigma_max_[i]=std::sqrt(sigma_max_est_[i]*dnrep);
1630 0 : Dsigma_[i] = 0.05*(sigma_max_[i] - sigma_min_[i]);
1631 0 : if(sigma_[i]>sigma_max_[i]) sigma_[i]=sigma_max_[i];
1632 : }
1633 : }
1634 : }
1635 0 : optimized_step_++;
1636 : }
1637 : // end sigma max optimization
1638 : } else {
1639 178 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1640 2876 : for(unsigned i=0; i<narg; ++i) {
1641 2764 : sigma_mean2_tmp[i] = sigma_mean2_last_[iselect][i][0];
1642 2764 : valueSigmaMean[i]->set(std::sqrt(sigma_mean2_tmp[i]));
1643 : }
1644 66 : } else if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1645 66 : sigma_mean2_tmp[0] = sigma_mean2_last_[iselect][0][0];
1646 66 : valueSigmaMean[0]->set(std::sqrt(sigma_mean2_tmp[0]));
1647 : }
1648 : }
1649 :
1650 178 : sigma_mean2_ = sigma_mean2_tmp;
1651 178 : }
1652 :
1653 178 : void Metainference::replica_averaging(const double weight, const double norm, std::vector<double> &mean, std::vector<double> &dmean_b)
1654 : {
1655 178 : if(master) {
1656 1660 : for(unsigned i=0; i<narg; ++i) mean[i] = weight/norm*getArgument(i);
1657 104 : if(nrep_>1) multi_sim_comm.Sum(&mean[0], narg);
1658 : }
1659 178 : comm.Sum(&mean[0], narg);
1660 : // set the derivative of the mean with respect to the bias
1661 3200 : for(unsigned i=0; i<narg; ++i) dmean_b[i] = weight/norm/kbt_*(getArgument(i)-mean[i])/static_cast<double>(average_weights_stride_);
1662 :
1663 : // this is only for generic metainference
1664 178 : if(firstTime) {ftilde_ = mean; firstTime = false;}
1665 178 : }
1666 :
1667 0 : void Metainference::do_regression_zero(const std::vector<double> &mean)
1668 : {
1669 : // parameters[i] = scale_ * mean[i]: find scale_ with linear regression
1670 : double num = 0.0;
1671 : double den = 0.0;
1672 0 : for(unsigned i=0; i<parameters.size(); ++i) {
1673 0 : num += mean[i] * parameters[i];
1674 0 : den += mean[i] * mean[i];
1675 : }
1676 0 : if(den>0) {
1677 0 : scale_ = num / den;
1678 : } else {
1679 0 : scale_ = 1.0;
1680 : }
1681 0 : }
1682 :
1683 178 : void Metainference::calculate()
1684 : {
1685 : // get step
1686 178 : const long long int step = getStep();
1687 :
1688 : unsigned iselect = 0;
1689 : // set the value of selector for REM-like stuff
1690 178 : if(selector_.length()>0) iselect = static_cast<unsigned>(plumed.passMap[selector_]);
1691 :
1692 : /* 1) collect weights */
1693 178 : double weight = 0.;
1694 178 : double neff = 0.;
1695 178 : double norm = 0.;
1696 178 : get_weights(iselect, weight, norm, neff);
1697 :
1698 : /* 2) calculate average */
1699 178 : std::vector<double> mean(narg,0);
1700 : // this is the derivative of the mean with respect to the argument
1701 178 : std::vector<double> dmean_x(narg,weight/norm);
1702 : // this is the derivative of the mean with respect to the bias
1703 178 : std::vector<double> dmean_b(narg,0);
1704 : // calculate it
1705 178 : replica_averaging(weight, norm, mean, dmean_b);
1706 :
1707 : /* 3) calculates parameters */
1708 178 : get_sigma_mean(iselect, weight, norm, neff, mean);
1709 :
1710 : // in case of regression with zero intercept, calculate scale
1711 178 : if(doregres_zero_ && step%nregres_zero_==0) do_regression_zero(mean);
1712 :
1713 : /* 4) run monte carlo */
1714 178 : double ene = doMonteCarlo(mean);
1715 :
1716 : // calculate bias and forces
1717 178 : switch(noise_type_) {
1718 12 : case GAUSS:
1719 12 : getEnergyForceGJ(mean, dmean_x, dmean_b);
1720 : break;
1721 52 : case MGAUSS:
1722 52 : getEnergyForceGJE(mean, dmean_x, dmean_b);
1723 : break;
1724 54 : case OUTLIERS:
1725 54 : getEnergyForceSP(mean, dmean_x, dmean_b);
1726 : break;
1727 48 : case MOUTLIERS:
1728 48 : getEnergyForceSPE(mean, dmean_x, dmean_b);
1729 : break;
1730 12 : case GENERIC:
1731 12 : getEnergyForceMIGEN(mean, dmean_x, dmean_b);
1732 : break;
1733 : }
1734 :
1735 178 : setBias(ene);
1736 178 : }
1737 :
1738 19 : void Metainference::writeStatus()
1739 : {
1740 19 : sfile_.rewind();
1741 19 : sfile_.printField("time",getTimeStep()*getStep());
1742 : //nsel
1743 38 : for(unsigned i=0; i<sigma_mean2_last_.size(); i++) {
1744 : std::string msg_i,msg_j;
1745 19 : Tools::convert(i,msg_i);
1746 : std::vector <double> max_values;
1747 : //narg
1748 2434 : for(unsigned j=0; j<narg; ++j) {
1749 2415 : Tools::convert(j,msg_j);
1750 2415 : std::string msg = msg_i+"_"+msg_j;
1751 2415 : if(noise_type_==MGAUSS||noise_type_==MOUTLIERS||noise_type_==GENERIC) {
1752 7170 : sfile_.printField("sigmaMean_"+msg,std::sqrt(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end())));
1753 : } else {
1754 : // find maximum for each data point
1755 50 : max_values.push_back(*max_element(sigma_mean2_last_[i][j].begin(), sigma_mean2_last_[i][j].end()));
1756 : }
1757 : }
1758 19 : if(noise_type_==GAUSS||noise_type_==OUTLIERS) {
1759 : // find maximum across data points
1760 6 : const double max_now = std::sqrt(*max_element(max_values.begin(), max_values.end()));
1761 6 : Tools::convert(0,msg_j);
1762 6 : std::string msg = msg_i+"_"+msg_j;
1763 12 : sfile_.printField("sigmaMean_"+msg, max_now);
1764 : }
1765 : }
1766 2415 : for(unsigned i=0; i<sigma_.size(); ++i) {
1767 : std::string msg;
1768 2396 : Tools::convert(i,msg);
1769 4792 : sfile_.printField("sigma_"+msg,sigma_[i]);
1770 : }
1771 2415 : for(unsigned i=0; i<sigma_max_.size(); ++i) {
1772 : std::string msg;
1773 2396 : Tools::convert(i,msg);
1774 4792 : sfile_.printField("sigma_max_"+msg,sigma_max_[i]);
1775 : }
1776 19 : if(noise_type_==GENERIC) {
1777 3 : for(unsigned i=0; i<ftilde_.size(); ++i) {
1778 : std::string msg;
1779 2 : Tools::convert(i,msg);
1780 4 : sfile_.printField("ftilde_"+msg,ftilde_[i]);
1781 : }
1782 : }
1783 19 : sfile_.printField("scale0_",scale_);
1784 19 : sfile_.printField("offset0_",offset_);
1785 38 : for(unsigned i=0; i<average_weights_.size(); i++) {
1786 : std::string msg_i;
1787 19 : Tools::convert(i,msg_i);
1788 38 : sfile_.printField("weight_"+msg_i,average_weights_[i][replica_]);
1789 : }
1790 19 : sfile_.printField();
1791 19 : sfile_.flush();
1792 19 : }
1793 :
1794 178 : void Metainference::update() {
1795 : // write status file
1796 178 : if(write_stride_>0&& (getStep()%write_stride_==0 || getCPT()) ) writeStatus();
1797 178 : }
1798 :
1799 : }
1800 : }
1801 :
|