Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2014-2019 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionWithValue.h"
23 : #include "core/ActionAtomistic.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "reference/MultiReferenceBase.h"
26 : #include "reference/MetricRegister.h"
27 : #include "core/ActionRegister.h"
28 : #include "core/PlumedMain.h"
29 : #include "reference/Direction.h"
30 : #include "tools/Pbc.h"
31 :
32 : //+PLUMEDOC COLVAR PCAVARS
33 : /*
34 : Projection on principal component eigenvectors or other high dimensional linear subspace
35 :
36 : The collective variables described in \ref dists allow one to calculate the distance between the
37 : instaneous structure adopted by the system and some high-dimensional, reference configuration. The
38 : problem with doing this is that, as one gets further and further from the reference configuration, the
39 : distance from it becomes a progressively poorer and poorer collective variable. This happens because
40 : the ``number" of structures at a distance \f$d\f$ from a reference configuration is proportional to \f$d^N\f$ in
41 : an \f$N\f$ dimensional space. Consequently, when \f$d\f$ is small the distance from the reference configuration
42 : may well be a good collective variable. However, when \f$d\f$ is large it is unlikely that the distance from the reference
43 : structure is a good CV. When the distance is large there will almost certainly be markedly different
44 : configuration that have the same CV value and hence barriers in transverse degrees of
45 : freedom.
46 :
47 : For these reasons dimensionality reduction is often employed so a projection \f$\mathbf{s}\f$ of a high-dimensional configuration
48 : \f$\mathbf{X}\f$ in a lower dimensionality space using a function:
49 :
50 : \f[
51 : \mathbf{s} = F(\mathbf{X}-\mathbf{X}^{ref})
52 : \f]
53 :
54 : where here we have introduced some high-dimensional reference configuration \f$\mathbf{X}^{ref}\f$. By far the simplest way to
55 : do this is to use some linear operator for \f$F\f$. That is to say we find a low-dimensional projection
56 : by rotating the basis vectors using some linear algebra:
57 :
58 : \f[
59 : \mathbf{s}_i = \sum_k A_{ik} ( X_{k} - X_{k}^{ref} )
60 : \f]
61 :
62 : Here \f$A\f$ is a \f$d\f$ by \f$D\f$ matrix where \f$D\f$ is the dimensionality of the high dimensional space and \f$d\f$ is
63 : the dimensionality of the lower dimensional subspace. In plumed when this kind of projection you can use the majority
64 : of the metrics detailed on \ref dists to calculate the displacement, \f$\mathbf{X}-\mathbf{X}^{ref}\f$, from the reference configuration.
65 : The matrix \f$A\f$ can be found by various means including principal component analysis and normal mode analysis. In both these methods the
66 : rows of \f$A\f$ would be the principle eigenvectors of a square matrix. For PCA the covariance while for normal modes the Hessian.
67 :
68 : \bug It is not possible to use the \ref DRMSD metric with this variable. You can get around this by listing the set of distances you wish to calculate for your DRMSD in the plumed file explicitally and using the EUCLIDEAN metric. MAHALONOBIS and NORM-EUCLIDEAN also do not work with this variable but using these options makes little sense when projecting on a linear subspace.
69 :
70 : \par Examples
71 :
72 : The following input calculates a projection on a linear subspace where the displacements
73 : from the reference configuration are calculated using the OPTIMAL metric. Consequently,
74 : both translation of the center of mass of the atoms and rotation of the reference
75 : frame are removed from these displacements. The matrix \f$A\f$ and the reference
76 : configuration \f$R^{ref}\f$ are specified in the pdb input file reference.pdb and the
77 : value of all projections (and the residual) are output to a file called colvar2.
78 :
79 : \plumedfile
80 : PCAVARS REFERENCE=reference.pdb TYPE=OPTIMAL LABEL=pca2
81 : PRINT ARG=pca2.* FILE=colvar2
82 : \endplumedfile
83 :
84 : The reference configurations can be specified using a pdb file. The first configuration that you provide is the reference configuration,
85 : which is refered to in the above as \f$X^{ref}\f$ subsequent configurations give the directions of row vectors that are contained in
86 : the matrix \f$A\f$ above. These directions can be specified by specifying a second configuration - in this case a vector will
87 : be constructed by calculating the displacement of this second configuration from the reference configuration. A pdb input prepared
88 : in this way would look as follows:
89 :
90 : \verbatim
91 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
92 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
93 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
94 : ATOM 13 HB2 ALA 2 21.112 -10.688 -12.476 1.00 1.00
95 : ATOM 15 C ALA 2 19.422 7.978 -14.536 1.00 1.00
96 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
97 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
98 : END
99 : ATOM 2 CH3 ACE 1 13.932 -14.718 -6.016 1.00 1.00
100 : ATOM 5 C ACE 1 20.312 -9.928 -5.946 1.00 1.00
101 : ATOM 9 CA ALA 2 18.462 -11.088 -8.986 1.00 1.00
102 : ATOM 13 HB2 ALA 2 20.112 -11.688 -12.476 1.00 1.00
103 : ATOM 15 C ALA 2 19.422 7.978 -12.536 1.00 1.00
104 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
105 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
106 : END
107 : \endverbatim
108 :
109 : Alternatively, the second configuration can specify the components of \f$A\f$ explicitally. In this case you need to include the
110 : keyword TYPE=DIRECTION in the remarks to the pdb as shown below.
111 :
112 : \verbatim
113 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
114 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
115 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
116 : ATOM 13 HB2 ALA 2 21.112 -10.688 -12.476 1.00 1.00
117 : ATOM 15 C ALA 2 19.422 7.978 -14.536 1.00 1.00
118 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
119 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
120 : END
121 : REMARK TYPE=DIRECTION
122 : ATOM 2 CH3 ACE 1 0.1414 0.3334 -0.0302 1.00 0.00
123 : ATOM 5 C ACE 1 0.0893 -0.1095 -0.1434 1.00 0.00
124 : ATOM 9 CA ALA 2 0.0207 -0.321 0.0321 1.00 0.00
125 : ATOM 13 HB2 ALA 2 0.0317 -0.6085 0.0783 1.00 0.00
126 : ATOM 15 C ALA 2 0.1282 -0.4792 0.0797 1.00 0.00
127 : ATOM 20 HH31 NME 3 0.0053 -0.465 0.0309 1.00 0.00
128 : ATOM 21 HH32 NME 3 -0.1019 -0.4261 -0.0082 1.00 0.00
129 : END
130 : \endverbatim
131 :
132 : If your metric involves arguments the labels of these arguments in your plumed input file should be specified in the REMARKS
133 : for each of the frames of your path. An input file in this case might look like this:
134 :
135 : \verbatim
136 : DESCRIPTION: a pca eigenvector specified using the start point and direction in the HD space.
137 : REMARK WEIGHT=1.0
138 : REMARK ARG=d1,d2
139 : REMARK d1=1.0 d2=1.0
140 : END
141 : REMARK TYPE=DIRECTION
142 : REMARK ARG=d1,d2
143 : REMARK d1=0.1 d2=0.25
144 : END
145 : \endverbatim
146 :
147 : Here we are working with the EUCLIDEAN metric and notice that we have specified the components of \f$A\f$ using DIRECTION.
148 : Consequently, the values of d1 and d2 in the second frame above do not specify a particular coordinate in the high-dimensional
149 : space as in they do in the first frame. Instead these values are the coefficients that can be used to construct a linear combination of d1 and d2.
150 : If we wanted to specify the direction in this metric using the start and end point of the vector we would write:
151 :
152 : \verbatim
153 : DESCRIPTION: a pca eigenvector specified using the start and end point of a vector in the HD space.
154 : REMARK WEIGHT=1.0
155 : REMARK ARG=d1,d2
156 : REMARK d1=1.0 d2=1.0
157 : END
158 : REMARK ARG=d1,d2
159 : REMARK d1=1.1 d2=1.25
160 : END
161 : \endverbatim
162 :
163 : */
164 : //+ENDPLUMEDOC
165 :
166 : namespace PLMD {
167 : namespace mapping {
168 :
169 : class PCAVars :
170 : public ActionWithValue,
171 : public ActionAtomistic,
172 : public ActionWithArguments
173 : {
174 : private:
175 : /// The holders for the derivatives
176 : MultiValue myvals;
177 : ReferenceValuePack mypack;
178 : /// The position of the reference configuration (the one we align to)
179 : ReferenceConfiguration* myref;
180 : /// The eigenvectors we are interested in
181 : std::vector<Direction> directions;
182 : /// Stuff for applying forces
183 : std::vector<double> forces, forcesToApply;
184 : public:
185 : static void registerKeywords( Keywords& keys );
186 : explicit PCAVars(const ActionOptions&);
187 : ~PCAVars();
188 : unsigned getNumberOfDerivatives();
189 : void lockRequests();
190 : void unlockRequests();
191 : void calculateNumericalDerivatives( ActionWithValue* a );
192 : void calculate();
193 : void apply();
194 : };
195 :
196 6456 : PLUMED_REGISTER_ACTION(PCAVars,"PCAVARS")
197 :
198 5 : void PCAVars::registerKeywords( Keywords& keys ) {
199 5 : Action::registerKeywords( keys );
200 5 : ActionWithValue::registerKeywords( keys );
201 5 : ActionAtomistic::registerKeywords( keys );
202 5 : ActionWithArguments::registerKeywords( keys );
203 5 : componentsAreNotOptional(keys);
204 20 : keys.addOutputComponent("eig","default","the projections on each eigenvalue are stored on values labeled eig-1, eig-2, ...");
205 20 : keys.addOutputComponent("residual","default","the distance of the configuration from the linear subspace defined "
206 : "by the vectors, \\f$e_i\\f$, that are contained in the rows of \\f$A\\f$. In other words this is "
207 : "\\f$\\sqrt( r^2 - \\sum_i [\\mathbf{r}.\\mathbf{e_i}]^2)\\f$ where "
208 : "\\f$r\\f$ is the distance between the instantaneous position and the "
209 : "reference point.");
210 20 : keys.add("compulsory","REFERENCE","a pdb file containing the reference configuration and configurations that define the directions for each eigenvector");
211 25 : keys.add("compulsory","TYPE","OPTIMAL","The method we are using for alignment to the reference structure");
212 5 : }
213 :
214 4 : PCAVars::PCAVars(const ActionOptions& ao):
215 : Action(ao),
216 : ActionWithValue(ao),
217 : ActionAtomistic(ao),
218 : ActionWithArguments(ao),
219 : myvals(1,0),
220 12 : mypack(0,0,myvals)
221 : {
222 :
223 : // What type of distance are we calculating
224 8 : std::string mtype; parse("TYPE",mtype);
225 :
226 : // Open reference file
227 8 : std::string reference; parse("REFERENCE",reference);
228 4 : FILE* fp=fopen(reference.c_str(),"r");
229 4 : if(!fp) error("could not open reference file " + reference );
230 :
231 : // Read all reference configurations
232 12 : MultiReferenceBase myframes( "", false );
233 : bool do_read=true; unsigned nfram=0;
234 16 : while (do_read) {
235 28 : PDB mypdb;
236 : // Read the pdb file
237 32 : do_read=mypdb.readFromFilepointer(fp,plumed.getAtoms().usingNaturalUnits(),0.1/atoms.getUnits().getLength());
238 : // Fix argument names
239 16 : expandArgKeywordInPDB( mypdb );
240 16 : if(do_read) {
241 12 : if( nfram==0 ) {
242 4 : myref = metricRegister().create<ReferenceConfiguration>( mtype, mypdb );
243 4 : Direction* tdir = dynamic_cast<Direction*>( myref );
244 4 : if( tdir ) error("first frame should be reference configuration - not direction of vector");
245 4 : if( !myref->pcaIsEnabledForThisReference() ) error("can't do PCA with reference type " + mtype );
246 8 : std::vector<std::string> remarks( mypdb.getRemark() ); std::string rtype;
247 8 : bool found=Tools::parse( remarks, "TYPE", rtype );
248 12 : if(!found) { std::vector<std::string> newrem(1); newrem[0]="TYPE="+mtype; mypdb.addRemark(newrem); }
249 4 : myframes.readFrame( mypdb );
250 8 : } else myframes.readFrame( mypdb );
251 12 : nfram++;
252 : } else {
253 : break;
254 : }
255 : }
256 4 : fclose(fp);
257 :
258 4 : if( nfram<=1 ) error("no eigenvectors were specified");
259 8 : log.printf(" found %u eigenvectors in file %s \n",nfram-1,reference.c_str() );
260 :
261 : // Finish the setup of the mapping object
262 : // Get the arguments and atoms that are required
263 4 : std::vector<AtomNumber> atoms; std::vector<std::string> args;
264 4 : myframes.getAtomAndArgumentRequirements( atoms, args );
265 4 : requestAtoms( atoms ); std::vector<Value*> req_args;
266 4 : interpretArgumentList( args, req_args ); requestArguments( req_args );
267 :
268 : // Setup the derivative pack
269 8 : if( atoms.size()>0 ) myvals.resize( 1, args.size() + 3*atoms.size() + 9 );
270 0 : else myvals.resize( 1, args.size() );
271 8 : mypack.resize( args.size(), atoms.size() );
272 92 : for(unsigned i=0; i<atoms.size(); ++i) mypack.setAtomIndex( i, i );
273 : /// This sets up all the storage data required by PCA in the pack
274 4 : myframes.getFrame(0)->setupPCAStorage( mypack );
275 :
276 : // Retrieve the position of the first frame, as we use this for alignment
277 4 : myref->setNamesAndAtomNumbers( atoms, args );
278 : // Check there are no periodic arguments
279 4 : for(unsigned i=0; i<getNumberOfArguments(); ++i) {
280 0 : if( getPntrToArgument(i)->isPeriodic() ) error("cannot use periodic variables in pca projections");
281 : }
282 : // Work out if the user wants to normalise the input vector
283 4 : checkRead();
284 :
285 : // Resize the matrices that will hold our eivenvectors
286 20 : for(unsigned i=1; i<nfram; ++i) {
287 32 : directions.push_back( Direction(ReferenceConfigurationOptions("DIRECTION")));
288 16 : directions[i-1].setNamesAndAtomNumbers( atoms, args );
289 : }
290 :
291 : // Create fake periodic boundary condition (these would only be used for DRMSD which is not allowed)
292 : // Now calculate the eigenvectors
293 20 : for(unsigned i=1; i<nfram; ++i) {
294 24 : myframes.getFrame(i)->extractDisplacementVector( myref->getReferencePositions(), getArguments(), myref->getReferenceArguments(), true, directions[i-1] );
295 : // Create a component to store the output
296 8 : std::string num; Tools::convert( i, num );
297 24 : addComponentWithDerivatives("eig-"+num); componentIsNotPeriodic("eig-"+num);
298 : }
299 12 : addComponentWithDerivatives("residual"); componentIsNotPeriodic("residual");
300 :
301 : // Get appropriate number of derivatives
302 : unsigned nder;
303 4 : if( getNumberOfAtoms()>0 ) {
304 8 : nder = 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
305 : } else {
306 : nder = getNumberOfArguments();
307 : }
308 :
309 : // Resize all derivative arrays
310 4 : forces.resize( nder ); forcesToApply.resize( nder );
311 28 : for(unsigned i=0; i<getNumberOfComponents(); ++i) getPntrToComponent(i)->resizeDerivatives(nder);
312 4 : }
313 :
314 16 : PCAVars::~PCAVars() {
315 4 : delete myref;
316 8 : }
317 :
318 36 : unsigned PCAVars::getNumberOfDerivatives() {
319 36 : if( getNumberOfAtoms()>0 ) {
320 36 : return 3*getNumberOfAtoms() + 9 + getNumberOfArguments();
321 : }
322 0 : return getNumberOfArguments();
323 : }
324 :
325 44 : void PCAVars::lockRequests() {
326 : ActionWithArguments::lockRequests();
327 : ActionAtomistic::lockRequests();
328 44 : }
329 :
330 44 : void PCAVars::unlockRequests() {
331 : ActionWithArguments::unlockRequests();
332 : ActionAtomistic::unlockRequests();
333 44 : }
334 :
335 44 : void PCAVars::calculate() {
336 : // Clear the reference value pack
337 44 : mypack.clear();
338 : // Calculate distance between instaneous configuration and reference
339 88 : double dist = myref->calculate( getPositions(), getPbc(), getArguments(), mypack, true );
340 :
341 : // Start accumulating residual by adding derivatives of distance
342 88 : Value* resid=getPntrToComponent( getNumberOfComponents()-1 ); unsigned nargs=getNumberOfArguments();
343 44 : for(unsigned j=0; j<getNumberOfArguments(); ++j) resid->addDerivative( j, mypack.getArgumentDerivative(j) );
344 704 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
345 308 : Vector ader=mypack.getAtomDerivative( j );
346 2156 : for(unsigned k=0; k<3; ++k) resid->addDerivative( nargs +3*j+k, ader[k] );
347 : }
348 : // Retrieve the values of all arguments
349 44 : std::vector<double> args( getNumberOfArguments() ); for(unsigned i=0; i<getNumberOfArguments(); ++i) args[i]=getArgument(i);
350 :
351 : // Now calculate projections on pca vectors
352 44 : Vector adif, ader; Tensor fvir, tvir;
353 220 : for(unsigned i=0; i<getNumberOfComponents()-1; ++i) { // One less component as we also have residual
354 176 : double proj=myref->projectDisplacementOnVector( directions[i], getArguments(), args, mypack );
355 : // And now accumulate derivatives
356 88 : Value* eid=getPntrToComponent(i);
357 88 : for(unsigned j=0; j<getNumberOfArguments(); ++j) eid->addDerivative( j, mypack.getArgumentDerivative(j) );
358 88 : if( getNumberOfAtoms()>0 ) {
359 88 : tvir.zero();
360 1408 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
361 616 : Vector myader=mypack.getAtomDerivative(j);
362 4312 : for(unsigned k=0; k<3; ++k) {
363 1848 : eid->addDerivative( nargs + 3*j+k, myader[k] );
364 1848 : resid->addDerivative( nargs + 3*j+k, -2*proj*myader[k] );
365 : }
366 1232 : tvir += -1.0*Tensor( getPosition(j), myader );
367 : }
368 616 : for(unsigned j=0; j<3; ++j) {
369 2640 : for(unsigned k=0; k<3; ++k) eid->addDerivative( nargs + 3*getNumberOfAtoms() + 3*j + k, tvir(j,k) );
370 : }
371 : }
372 88 : dist -= proj*proj; // Subtract square from total squared distance to get residual squared
373 : // Derivatives of residual
374 88 : for(unsigned j=0; j<getNumberOfArguments(); ++j) resid->addDerivative( j, -2*proj*eid->getDerivative(j) );
375 : // for(unsigned j=0;j<getNumberOfArguments();++j) resid->addDerivative( j, -2*proj*arg_eigv(i,j) );
376 : // And set final value
377 88 : getPntrToComponent(i)->set( proj );
378 : }
379 44 : dist=sqrt(dist);
380 : resid->set( dist );
381 :
382 : // Take square root of residual derivatives
383 44 : double prefactor = 0.5 / dist;
384 44 : for(unsigned j=0; j<getNumberOfArguments(); ++j) resid->setDerivative( j, prefactor*resid->getDerivative(j) );
385 660 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
386 3080 : for(unsigned k=0; k<3; ++k) resid->setDerivative( nargs + 3*j+k, prefactor*resid->getDerivative( nargs+3*j+k ) );
387 : }
388 :
389 : // And finally virial for residual
390 44 : if( getNumberOfAtoms()>0 ) {
391 44 : tvir.zero();
392 660 : for(unsigned j=0; j<getNumberOfAtoms(); ++j) {
393 1232 : Vector ader; for(unsigned k=0; k<3; ++k) ader[k]=resid->getDerivative( nargs + 3*j+k );
394 616 : tvir += -1.0*Tensor( getPosition(j), ader );
395 : }
396 308 : for(unsigned j=0; j<3; ++j) {
397 1320 : for(unsigned k=0; k<3; ++k) resid->addDerivative( nargs + 3*getNumberOfAtoms() + 3*j + k, tvir(j,k) );
398 : }
399 : }
400 :
401 44 : }
402 :
403 0 : void PCAVars::calculateNumericalDerivatives( ActionWithValue* a ) {
404 0 : if( getNumberOfArguments()>0 ) {
405 0 : ActionWithArguments::calculateNumericalDerivatives( a );
406 : }
407 0 : if( getNumberOfAtoms()>0 ) {
408 0 : Matrix<double> save_derivatives( getNumberOfComponents(), getNumberOfArguments() );
409 0 : for(unsigned j=0; j<getNumberOfComponents(); ++j) {
410 0 : for(unsigned i=0; i<getNumberOfArguments(); ++i) save_derivatives(j,i)=getPntrToComponent(j)->getDerivative(i);
411 : }
412 0 : calculateAtomicNumericalDerivatives( a, getNumberOfArguments() );
413 0 : for(unsigned j=0; j<getNumberOfComponents(); ++j) {
414 0 : for(unsigned i=0; i<getNumberOfArguments(); ++i) getPntrToComponent(j)->addDerivative( i, save_derivatives(j,i) );
415 : }
416 : }
417 0 : }
418 :
419 44 : void PCAVars::apply() {
420 :
421 88 : bool wasforced=false; forcesToApply.assign(forcesToApply.size(),0.0);
422 308 : for(unsigned i=0; i<getNumberOfComponents(); ++i) {
423 132 : if( getPntrToComponent(i)->applyForce( forces ) ) {
424 : wasforced=true;
425 0 : for(unsigned i=0; i<forces.size(); ++i) forcesToApply[i]+=forces[i];
426 : }
427 : }
428 44 : if( wasforced ) {
429 0 : addForcesOnArguments( forcesToApply );
430 0 : if( getNumberOfAtoms()>0 ) setForcesOnAtoms( forcesToApply, getNumberOfArguments() );
431 : }
432 :
433 44 : }
434 :
435 : }
436 4839 : }
|