Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2020 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionWithArguments.h"
24 : #include "core/ActionRegister.h"
25 : #include "tools/ConjugateGradient.h"
26 : #include "tools/SwitchingFunction.h"
27 : #include "gridtools/GridSearch.h"
28 : #include "SMACOF.h"
29 :
30 : namespace PLMD {
31 : namespace dimred {
32 :
33 : //+PLUMEDOC DIMRED ARRANGE_POINTS
34 : /*
35 : Arrange points in a low dimensional space so that the (transformed) distances between points in the low dimensional space match the dissimilarities provided in an input matrix.
36 :
37 : \par Examples
38 :
39 : */
40 : //+ENDPLUMEDOC
41 :
42 : class ArrangePoints :
43 : public ActionWithValue,
44 : public ActionWithArguments {
45 : private:
46 : unsigned dimout, maxiter, ncycles, current_index;
47 : double cgtol, gbuf;
48 : std::vector<unsigned> npoints, nfgrid;
49 : std::vector<double> mypos;
50 : double smacof_tol, smacof_reg;
51 : int dist_target;
52 : enum {conjgrad,pointwise,smacof} mintype;
53 : std::vector<SwitchingFunction> switchingFunction;
54 : void checkInputMatrix( const std::string& key, const unsigned& nvals, const std::vector<Value*>& mat ) const ;
55 : double recalculateSmacofWeights( const std::vector<double>& p, SMACOF& mysmacof ) const ;
56 : protected:
57 : double calculateStress( const std::vector<double>& p, std::vector<double>& d );
58 : double calculateFullStress( const std::vector<double>& p, std::vector<double>& d );
59 : public:
60 : static void registerKeywords( Keywords& keys );
61 : ArrangePoints( const ActionOptions& );
62 0 : unsigned getNumberOfDerivatives() override {
63 0 : return 0;
64 : }
65 : void prepare() override ;
66 : void calculate() override ;
67 : virtual void optimize( std::vector<double>& pos );
68 2 : void apply() override {}
69 : };
70 :
71 : PLUMED_REGISTER_ACTION(ArrangePoints,"ARRANGE_POINTS")
72 :
73 12 : void ArrangePoints::registerKeywords( Keywords& keys ) {
74 12 : Action::registerKeywords( keys );
75 12 : ActionWithValue::registerKeywords( keys );
76 12 : ActionWithArguments::registerKeywords( keys );
77 24 : keys.addInputKeyword("compulsory","ARG","vector","the initial positions for the projections");
78 24 : keys.addInputKeyword("numbered","TARGET","matrix","the matrix of target quantities that you would like to match");
79 12 : keys.add("numbered","FUNC","a function that is applied on the distances between the points in the low dimensional space");
80 24 : keys.addInputKeyword("numbered","WEIGHTS","matrix","the matrix with the weights of the target quantities");
81 12 : keys.add("compulsory","MINTYPE","conjgrad","the method to use for the minimisation");
82 12 : keys.add("compulsory","MAXITER","1000","maximum number of optimization cycles for optimisation algorithms");
83 12 : keys.add("compulsory","CGTOL","1E-6","the tolerance for the conjugate gradient minimization");
84 12 : keys.add("compulsory","NCYCLES","5","the number of cycles of global optimization to attempt");
85 12 : keys.add("compulsory","BUFFER","1.1","grid extent for search is (max projection - minimum projection) multiplied by this value");
86 12 : keys.add("compulsory","CGRID_SIZE","10","number of points to use in each grid direction");
87 12 : keys.add("compulsory","FGRID_SIZE","0","interpolate the grid onto this number of points -- only works in 2D");
88 12 : keys.add("compulsory","SMACTOL","1E-4","the tolerance for the smacof algorithm");
89 12 : keys.add("compulsory","SMACREG","0.001","this is used to ensure that we don't divide by zero when updating weights for SMACOF algorithm");
90 24 : keys.addOutputComponent("coord","default","vector","the coordinates of the points in the low dimensional space");
91 12 : }
92 :
93 :
94 5 : ArrangePoints::ArrangePoints( const ActionOptions& ao ) :
95 : Action(ao),
96 : ActionWithValue(ao),
97 : ActionWithArguments(ao),
98 5 : current_index(0),
99 5 : dist_target(-1) {
100 5 : dimout = getNumberOfArguments();
101 5 : std::vector<unsigned> shape(1);
102 5 : shape[0]=getPntrToArgument(0)->getNumberOfValues();
103 15 : for(unsigned i=0; i<getNumberOfArguments(); ++i) {
104 10 : if( shape[0]!=getPntrToArgument(i)->getNumberOfValues() ) {
105 0 : error("mismatch between sizes of input coordinates");
106 : }
107 : std::string num;
108 10 : Tools::convert( i+1, num );
109 10 : addComponent( "coord-" + num, shape );
110 20 : componentIsNotPeriodic( "coord-" + num );
111 10 : getPntrToArgument(i)->buildDataStore();
112 : }
113 5 : std::vector<Value*> args( getArguments() ), target, weights;
114 : std::string sfd, errors;
115 : // Read in target "distances" and target weights
116 5 : for(unsigned i=1;; ++i) {
117 11 : target.resize(0);
118 22 : if( !parseArgumentList("TARGET",i,target) ) {
119 : break;
120 : }
121 : std::string inum;
122 6 : Tools::convert( i, inum );
123 6 : checkInputMatrix( "TARGET" + inum, shape[0], target );
124 12 : if( !parseArgumentList("WEIGHTS",i,weights) ) {
125 0 : error("missing WEIGHTS" + inum + " keyword in input");
126 : }
127 12 : checkInputMatrix( "WEIGHTS" + inum, shape[0], weights );
128 6 : args.push_back( target[0] );
129 6 : args.push_back( weights[0] );
130 6 : bool has_sf = parseNumbered("FUNC",i,sfd);
131 6 : switchingFunction.push_back( SwitchingFunction() );
132 6 : if( !has_sf ) {
133 1 : switchingFunction[i-1].set( "CUSTOM FUNC=1-sqrt(x2) R_0=1.0", errors );
134 1 : dist_target=i-1;
135 : } else {
136 5 : switchingFunction[i-1].set( sfd, errors );
137 5 : if( errors.length()!=0 ) {
138 0 : error("problem reading switching function description " + errors);
139 : }
140 : }
141 6 : log.printf(" %sth term seeks to match tranformed distances with those in matrix %s \n", inum.c_str(), target[0]->getName().c_str() );
142 6 : log.printf(" in %sth term distances are transformed by 1-switching function with r_0=%s \n", inum.c_str(), switchingFunction[i-1].description().c_str() );
143 6 : log.printf(" in %sth term weights of matrix elements in stress function are given by %s \n", inum.c_str(), weights[0]->getName().c_str() );
144 6 : }
145 : std::string mtype;
146 10 : parse("MINTYPE",mtype);
147 5 : if( mtype=="conjgrad" ) {
148 3 : mintype=conjgrad;
149 3 : log.printf(" minimimising stress function using conjugate gradients\n");
150 2 : } else if( mtype=="pointwise") {
151 1 : mintype=pointwise;
152 1 : log.printf(" minimimising stress function using pointwise global optimisation\n");
153 1 : npoints.resize(dimout);
154 1 : nfgrid.resize(dimout);
155 1 : parseVector("CGRID_SIZE",npoints);
156 1 : parse("BUFFER",gbuf);
157 1 : parse("NCYCLES",ncycles);
158 2 : parseVector("FGRID_SIZE",nfgrid);
159 1 : if( nfgrid[0]!=0 && dimout!=2 ) {
160 0 : error("interpolation only works in two dimensions");
161 : }
162 1 : log.printf(" doing %u cycles of global optimization sweeps\n",ncycles);
163 1 : log.printf(" using coarse grid of points that is %u",npoints[0]);
164 2 : for(unsigned j=1; j<npoints.size(); ++j) {
165 1 : log.printf(" by %u",npoints[j]);
166 : }
167 1 : log.printf("\n grid is %f times larger than the difference between the position of the minimum and maximum projection \n",gbuf);
168 1 : if( nfgrid[0]>0 ) {
169 1 : log.printf(" interpolating stress onto grid of points that is %u",nfgrid[0]);
170 2 : for(unsigned j=1; j<nfgrid.size(); ++j) {
171 1 : log.printf(" by %u",nfgrid[j]);
172 : }
173 1 : log.printf("\n");
174 : }
175 1 : } else if( mtype=="smacof" ) {
176 1 : mintype=smacof;
177 1 : if( dist_target<0 ) {
178 0 : error("one of targets must be distances in order to use smacof");
179 : }
180 1 : log.printf(" minimising stress fucntion using smacof\n");
181 1 : parse("SMACTOL",smacof_tol);
182 1 : parse("SMACREG",smacof_reg);
183 1 : log.printf(" tolerance for smacof algorithms equals %f \n", smacof_tol);
184 1 : log.printf(" using %f as regularisation parameter for weights in smacof algorithm\n", smacof_reg);
185 : } else {
186 0 : error("invalid MINTYPE");
187 : }
188 5 : if( mintype!=smacof) {
189 4 : parse("CGTOL",cgtol);
190 4 : log.printf(" tolerance for conjugate gradient algorithm equals %f \n",cgtol);
191 : }
192 5 : parse("MAXITER",maxiter);
193 5 : log.printf(" maximum number of iterations for minimimization algorithms equals %d \n", maxiter );
194 5 : requestArguments( args );
195 5 : checkRead();
196 5 : }
197 :
198 12 : void ArrangePoints::checkInputMatrix( const std::string& key, const unsigned& nvals, const std::vector<Value*>& mat ) const {
199 12 : mat[0]->buildDataStore();
200 12 : if( mat.size()!=1 ) {
201 0 : error("should only be one value in input to " + key );
202 : }
203 12 : if( mat[0]->getRank()!=2 || mat[0]->hasDerivatives() ) {
204 0 : error("input to " + key + " keyword should be a matrix");
205 : }
206 12 : if( mat[0]->getShape()[0]!=nvals || mat[0]->getShape()[1]!=nvals ) {
207 0 : error("input to " + key + " keyword has the wrong size");
208 : }
209 12 : }
210 :
211 24600 : double ArrangePoints::calculateStress( const std::vector<double>& p, std::vector<double>& d ) {
212 : double stress=0;
213 73800 : for(unsigned i=0; i<p.size(); ++i) {
214 49200 : d[i]=0.0;
215 : }
216 24600 : std::vector<double> dtmp(dimout);
217 24600 : std::vector<unsigned> shape( getPntrToArgument( dimout )->getShape() );
218 24600 : unsigned targi=shape[0]*current_index;
219 24600 : unsigned nmatrices = ( getNumberOfArguments() - dimout ) / 2;
220 2484600 : for(unsigned i=0; i<shape[0]; ++i) {
221 2460000 : if( i==current_index ) {
222 24600 : continue ;
223 : }
224 : // Calculate distance in low dimensional space
225 : double dd2=0;
226 7306200 : for(unsigned k=0; k<dimout; ++k) {
227 4870800 : dtmp[k]=p[k] - mypos[dimout*i+k];
228 4870800 : dd2+=dtmp[k]*dtmp[k];
229 : }
230 :
231 4870800 : for(unsigned k=0; k<nmatrices; ++k ) {
232 : // Now do transformations and calculate differences
233 2435400 : double df, fd = 1. - switchingFunction[k].calculateSqr( dd2, df );
234 : // Get the weight for this connection
235 : double weight = 0;
236 245975400 : for(unsigned j=0; j<shape[0]; ++j) {
237 243540000 : weight += getPntrToArgument( dimout + 2*k + 1 )->get( shape[0]*i+j );
238 : }
239 : // Get the difference for the connection
240 2435400 : double fdiff = fd - getPntrToArgument( dimout + 2*k )->get( targi+i );
241 : // Calculate derivatives
242 2435400 : double pref = -2.*weight*fdiff*df;
243 7306200 : for(unsigned n=0; n<dimout; ++n) {
244 4870800 : d[n] += pref*dtmp[n];
245 : }
246 : // Accumulate the total stress
247 2435400 : stress += weight*fdiff*fdiff;
248 : }
249 : }
250 24600 : return stress;
251 : }
252 :
253 1932 : double ArrangePoints::calculateFullStress( const std::vector<double>& p, std::vector<double>& d ) {
254 : // Zero derivative and stress accumulators
255 387932 : for(unsigned i=0; i<p.size(); ++i) {
256 386000 : d[i]=0.0;
257 : }
258 : double stress=0;
259 1932 : std::vector<double> dtmp( dimout );
260 :
261 1932 : unsigned nmatrices = ( getNumberOfArguments() - dimout ) / 2;
262 1932 : std::vector<unsigned> shape( getPntrToArgument( dimout )->getShape() );
263 193000 : for(unsigned i=1; i<shape[0]; ++i) {
264 14134568 : for(unsigned j=0; j<i; ++j) {
265 : // Calculate distance in low dimensional space
266 : double dd2=0;
267 41830500 : for(unsigned k=0; k<dimout; ++k) {
268 27887000 : dtmp[k]=p[dimout*i+k] - p[dimout*j+k];
269 27887000 : dd2+=dtmp[k]*dtmp[k];
270 : }
271 :
272 27887000 : for(unsigned k=0; k<nmatrices; ++k ) {
273 : // Now do transformations and calculate differences
274 13943500 : double df, fd = 1. - switchingFunction[k].calculateSqr( dd2, df );
275 : // Get the weight for this connection
276 13943500 : double weight = getPntrToArgument( dimout + 2*k + 1 )->get( shape[0]*i+j );
277 : // Get the difference for the connection
278 13943500 : double fdiff = fd - getPntrToArgument( dimout + 2*k )->get( shape[0]*i+j );
279 : // Calculate derivatives
280 13943500 : double pref = -2.*weight*fdiff*df;
281 41830500 : for(unsigned n=0; n<dimout; ++n) {
282 27887000 : double dterm=pref*dtmp[n];
283 27887000 : d[dimout*i+n]+=dterm;
284 27887000 : d[dimout*j+n]-=dterm;
285 : }
286 : // Accumulate the total stress
287 13943500 : stress += weight*fdiff*fdiff;
288 : }
289 : }
290 : }
291 1932 : return stress;
292 : }
293 :
294 2 : double ArrangePoints::recalculateSmacofWeights( const std::vector<double>& p, SMACOF& mysmacof ) const {
295 : double stress=0, totalWeight=0;
296 2 : unsigned nmatrices = ( getNumberOfArguments() - dimout ) / 2;
297 2 : std::vector<unsigned> shape( getPntrToArgument( dimout )->getShape() );
298 1000 : for(unsigned i=1; i<shape[0]; ++i) {
299 250498 : for(unsigned j=0; j<i; ++j) {
300 : // Calculate distance in low dimensional space
301 : double dd2=0;
302 748500 : for(unsigned k=0; k<dimout; ++k) {
303 499000 : double dtmp=p[dimout*i+k] - p[dimout*j+k];
304 499000 : dd2+=dtmp*dtmp;
305 : }
306 : // Calculate difference between target difference and true difference
307 249500 : double wval=0, dd1 = sqrt(dd2);
308 249500 : double diff = mysmacof.getDistance(i,j) - dd1;
309 :
310 748500 : for(unsigned k=0; k<nmatrices; ++k ) {
311 : // Don't need to do anything for distances we are matching
312 499000 : if( k==dist_target ) {
313 249500 : continue;
314 : }
315 : // Now do transformations and calculate differences
316 249500 : double df, fd = 1. - switchingFunction[k].calculateSqr( dd2, df );
317 : // Get the weight for this connection
318 249500 : double weight = getPntrToArgument( dimout + 2*k + 1 )->get( shape[0]*i+j );
319 : // Get the difference for the connection
320 249500 : double fdiff = getPntrToArgument( dimout + 2*k )->get( shape[0]*i+j ) - fd;
321 : // Now set the weight if difference in distance is larger than regularisation parameter
322 249500 : if( fabs(diff)>smacof_reg ) {
323 249403 : wval -= weight*fdiff*df*dd1 / diff;
324 : }
325 : // And the total stress and weights
326 249500 : stress += weight*fdiff*fdiff;
327 249500 : totalWeight += weight;
328 : }
329 249500 : mysmacof.setWeight( j, i, wval );
330 249500 : mysmacof.setWeight( i, j, wval );
331 : }
332 : }
333 2 : return stress / totalWeight;
334 : }
335 :
336 7 : void ArrangePoints::optimize( std::vector<double>& pos ) {
337 : ConjugateGradient<ArrangePoints> mycgminimise( this );
338 7 : if( mintype==conjgrad ) {
339 5 : mycgminimise.minimise( cgtol, pos, &ArrangePoints::calculateFullStress );
340 2 : } else if( mintype==pointwise ) {
341 1 : unsigned nvals=getPntrToArgument( dimout )->getShape()[0];
342 1 : std::vector<double> gmin( dimout ), gmax( dimout ), mypoint( dimout );
343 : // Find the extent of the grid
344 3 : for(unsigned j=0; j<dimout; ++j) {
345 2 : gmin[j]=gmax[j]=pos[j];
346 : }
347 100 : for(unsigned i=1; i<nvals; ++i) {
348 297 : for(unsigned j=0; j<dimout; ++j) {
349 198 : if( pos[dimout*i+j] < gmin[j] ) {
350 7 : gmin[j] = pos[dimout*i+j];
351 : }
352 198 : if( pos[dimout*i+j] > gmax[j] ) {
353 8 : gmax[j] = pos[dimout*i+j];
354 : }
355 : }
356 : }
357 3 : for(unsigned j=0; j<dimout; ++j) {
358 2 : double gbuffer = 0.5*gbuf*( gmax[j]-gmin[j] ) - 0.5*( gmax[j]- gmin[j] );
359 2 : gmin[j]-=gbuffer;
360 2 : gmax[j]+=gbuffer;
361 : }
362 1 : mypos.resize( pos.size() );
363 201 : for(unsigned i=0; i<mypos.size(); ++i) {
364 200 : mypos[i] = pos[i];
365 : }
366 1 : gridtools::GridSearch<ArrangePoints> mygridsearch( gmin, gmax, npoints, nfgrid, this );
367 : // Run multiple loops over all projections
368 3 : for(unsigned i=0; i<ncycles; ++i) {
369 202 : for(unsigned j=0; j<nvals; ++j) {
370 : // Setup target distances and target functions for calculate stress
371 200 : current_index=j;
372 :
373 : // Find current projection of jth point
374 600 : for(unsigned k=0; k<dimout; ++k) {
375 400 : mypoint[k]=mypos[j*dimout+k];
376 : }
377 : // Minimise using grid search
378 200 : bool moved=mygridsearch.minimise( mypoint, &ArrangePoints::calculateStress );
379 200 : if( moved ) {
380 : // Reassign the new projection
381 66 : for(unsigned k=0; k<dimout; ++k) {
382 44 : mypos[dimout*j+k]=mypoint[k];
383 : }
384 : // Minimise output using conjugate gradient
385 22 : mycgminimise.minimise( cgtol, mypos, &ArrangePoints::calculateFullStress );
386 : }
387 : }
388 402 : for(unsigned i=0; i<mypos.size(); ++i) {
389 400 : pos[i] = mypos[i];
390 : }
391 : }
392 2 : } else if( mintype==smacof ) {
393 1 : SMACOF mysmacof( getPntrToArgument( dimout + 2*dist_target) );
394 1 : double stress = recalculateSmacofWeights( pos, mysmacof );
395 :
396 1 : for(unsigned i=0; i<maxiter; ++i) {
397 : // Optimise using smacof and current weights
398 1 : mysmacof.optimize( smacof_tol, maxiter, pos );
399 : // Recalculate weights matrix and sigma
400 1 : double newsig = recalculateSmacofWeights( pos, mysmacof );
401 : // Test whether or not the algorithm has converged
402 1 : if( fabs( newsig - stress )<smacof_tol ) {
403 : break;
404 : }
405 : // Make initial sigma into new sigma so that the value of new sigma is used every time so that the error can be reduced
406 : stress=newsig;
407 : }
408 : }
409 7 : }
410 :
411 8 : void ArrangePoints::prepare() {
412 : // Make sure all the components are the right size
413 8 : std::vector<unsigned> shape(1,getPntrToArgument( dimout )->getShape()[0]);
414 24 : for(unsigned j=0; j<dimout; ++j) {
415 16 : if( getPntrToComponent(j)->getShape()[0]!=shape[0] ) {
416 8 : getPntrToComponent(j)->setShape( shape );
417 : }
418 : }
419 8 : }
420 :
421 7 : void ArrangePoints::calculate() {
422 : // Retrive the initial value
423 7 : unsigned nvals = getPntrToArgument( dimout )->getShape()[0];
424 7 : std::vector<double> pos( dimout*nvals );
425 1107 : for(unsigned i=0; i<nvals; ++i) {
426 3300 : for(unsigned j=0; j<dimout; ++j) {
427 2200 : pos[ dimout*i + j ] = getPntrToArgument(j)->get(i);
428 : }
429 : }
430 : // Do the optimization
431 7 : optimize( pos );
432 : // And set the final values
433 1107 : for(unsigned i=0; i<nvals; ++i) {
434 3300 : for(unsigned j=0; j<dimout; ++j) {
435 2200 : getPntrToComponent(j)->set( i, pos[dimout*i+j] );
436 : }
437 : }
438 7 : }
439 :
440 : }
441 : }
|