Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2013-2018 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionShortcut.h"
23 : #include "core/ActionRegister.h"
24 : #include "core/PlumedMain.h"
25 : #include "core/ActionWithArguments.h"
26 : #include "tools/PDB.h"
27 : #include "Path.h"
28 :
29 : namespace PLMD {
30 : namespace mapping {
31 :
32 : //+PLUMEDOC COLVAR PCAVARS
33 : /*
34 : Projection on principal component eigenvectors or other high dimensional linear subspace
35 :
36 : The collective variables described in \ref dists allow one to calculate the distance between the
37 : instantaneous structure adopted by the system and some high-dimensional, reference configuration. The
38 : problem with doing this is that, as one gets further and further from the reference configuration, the
39 : distance from it becomes a progressively poorer and poorer collective variable. This happens because
40 : the ``number" of structures at a distance \f$d\f$ from a reference configuration is proportional to \f$d^N\f$ in
41 : an \f$N\f$ dimensional space. Consequently, when \f$d\f$ is small the distance from the reference configuration
42 : may well be a good collective variable. However, when \f$d\f$ is large it is unlikely that the distance from the reference
43 : structure is a good CV. When the distance is large there will almost certainly be markedly different
44 : configuration that have the same CV value and hence barriers in transverse degrees of
45 : freedom.
46 :
47 : For these reasons dimensionality reduction is often employed so a projection \f$\mathbf{s}\f$ of a high-dimensional configuration
48 : \f$\mathbf{X}\f$ in a lower dimensionality space using a function:
49 :
50 : \f[
51 : \mathbf{s} = F(\mathbf{X}-\mathbf{X}^{ref})
52 : \f]
53 :
54 : where here we have introduced some high-dimensional reference configuration \f$\mathbf{X}^{ref}\f$. By far the simplest way to
55 : do this is to use some linear operator for \f$F\f$. That is to say we find a low-dimensional projection
56 : by rotating the basis vectors using some linear algebra:
57 :
58 : \f[
59 : \mathbf{s}_i = \sum_k A_{ik} ( X_{k} - X_{k}^{ref} )
60 : \f]
61 :
62 : Here \f$A\f$ is a \f$d\f$ by \f$D\f$ matrix where \f$D\f$ is the dimensionality of the high dimensional space and \f$d\f$ is
63 : the dimensionality of the lower dimensional subspace. In plumed when this kind of projection you can use the majority
64 : of the metrics detailed on \ref dists to calculate the displacement, \f$\mathbf{X}-\mathbf{X}^{ref}\f$, from the reference configuration.
65 : The matrix \f$A\f$ can be found by various means including principal component analysis and normal mode analysis. In both these methods the
66 : rows of \f$A\f$ would be the principle eigenvectors of a square matrix. For PCA the covariance while for normal modes the Hessian.
67 :
68 : \bug It is not possible to use the \ref DRMSD metric with this variable. You can get around this by listing the set of distances you wish to calculate for your DRMSD in the plumed file explicitly and using the EUCLIDEAN metric. MAHALONOBIS and NORM-EUCLIDEAN also do not work with this variable but using these options makes little sense when projecting on a linear subspace.
69 :
70 : \par Examples
71 :
72 : The following input calculates a projection on a linear subspace where the displacements
73 : from the reference configuration are calculated using the OPTIMAL metric. Consequently,
74 : both translation of the center of mass of the atoms and rotation of the reference
75 : frame are removed from these displacements. The matrix \f$A\f$ and the reference
76 : configuration \f$R^{ref}\f$ are specified in the pdb input file reference.pdb and the
77 : value of all projections (and the residual) are output to a file called colvar2.
78 :
79 : \plumedfile
80 : PCAVARS REFERENCE=reference.pdb TYPE=OPTIMAL LABEL=pca2
81 : PRINT ARG=pca2.* FILE=colvar2
82 : \endplumedfile
83 :
84 : The reference configurations can be specified using a pdb file. The first configuration that you provide is the reference configuration,
85 : which is referred to in the above as \f$X^{ref}\f$ subsequent configurations give the directions of row vectors that are contained in
86 : the matrix \f$A\f$ above. These directions are specified by giving a second configuration that describes the components of \f$A\f$ explicitly.
87 :
88 : \verbatim
89 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
90 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
91 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
92 : ATOM 13 HB2 ALA 2 21.112 -10.688 -12.476 1.00 1.00
93 : ATOM 15 C ALA 2 19.422 7.978 -14.536 1.00 1.00
94 : ATOM 20 HH31 NME 3 20.122 -9.928 -17.746 1.00 1.00
95 : ATOM 21 HH32 NME 3 18.572 -13.148 -16.346 1.00 1.00
96 : END
97 : REMARK
98 : ATOM 2 CH3 ACE 1 0.1414 0.3334 -0.0302 1.00 0.00
99 : ATOM 5 C ACE 1 0.0893 -0.1095 -0.1434 1.00 0.00
100 : ATOM 9 CA ALA 2 0.0207 -0.321 0.0321 1.00 0.00
101 : ATOM 13 HB2 ALA 2 0.0317 -0.6085 0.0783 1.00 0.00
102 : ATOM 15 C ALA 2 0.1282 -0.4792 0.0797 1.00 0.00
103 : ATOM 20 HH31 NME 3 0.0053 -0.465 0.0309 1.00 0.00
104 : ATOM 21 HH32 NME 3 -0.1019 -0.4261 -0.0082 1.00 0.00
105 : END
106 : \endverbatim
107 :
108 : Notice that the PCAVARS command is a shortcut. If you look at how the shortcut in the above input is expanded you should be able to see how the command works
109 : by calculating the RMSD displacement between the instantaneous and reference configuration and how those displacements are then projected on the eigenvector that
110 : was specified in the second frame of the pdb input above. Understanding the expanded version of this shortcut command allows you to recognise that you can project
111 : the displacement vector on any arbitrary vector. For example in the input below two reference structures are provided in the pdb file. PLUMED calculates the RMSD
112 : distance between these two reference configurations during setup and sets up a unit vector called eig that points along the director connecting the two RMSD structure.
113 : During the calculation the vector connecting the instantaneous configuration and the first of the two reference configurations in computed. This vector is then projected
114 : on the unit vector connecting the two initial structures:
115 :
116 : \plumedfile
117 : # Read in two reference configuratioms from PDB file
118 : ref1: PDB2CONSTANT REFERENCE=two-frames.pdb NUMBER=1
119 : ref1T: TRANSPOSE ARG=ref1
120 : ref2: PDB2CONSTANT REFERENCE=two-frames.pdb NUMBER=2
121 : # Calculate the displacement vector that takes you from ref1 to ref2
122 : eigu: RMSD_VECTOR ARG=ref1T,ref2 DISPLACEMENT TYPE=OPTIMAL
123 : # Normalise the reference vector
124 : eigu2: CUSTOM ARG=eigu.disp FUNC=x*x PERIODIC=NO
125 : eign2: SUM ARG=eigu2 PERIODIC=NO
126 : eig: CUSTOM ARG=eigu.disp,eign2 FUNC=x/sqrt(y) PERIODIC=NO
127 : eigT: TRANSPOSE ARG=eig
128 : # Everything prior to this point is only done in setup. From here onwards we have the commands that are done during the main calculate loop.
129 :
130 : # Calculate the RMSD displacement between the instaneous structure and the first reference structure
131 : rmsd: RMSD REFERENCE=two-frames.pdb NUMBER=1 TYPE=OPTIMAL DISPLACEMENT SQUARED
132 : # Project the displacement computed above on the director of the vector that connects reference structure ref1 to refeference structure ref2
133 : pca: MATRIX_VECTOR_PRODUCT ARG=eigT,rmsd.disp
134 :
135 : # Print the final CV to a file
136 : PRINT ARG=pca FILE=colvar
137 : \endplumedfile
138 :
139 : You also project vectors of differences of arguments on reference vectors. For example, the input below can be used to look at the projection
140 : of the vector connecting the instantanous configuraiton to a reference point in CV on a reference vector that has been specified in the PDB file.
141 :
142 : \plumedfile
143 : d1: DISTANCE ATOMS=1,2
144 : d2: DISTANCE ATOMS=3,4
145 : d3: DISTANCE ATOMS=5,6
146 : pca: PCAVARS ARG=d1,d2,d3 REFERENCE=epath.pdb
147 : PRINT ARG=pca_eig-1,pca_residual FILE=colvar
148 : \endplumedfile
149 :
150 : The pdb input file for this calculation might look something like this:
151 :
152 : \verbatim
153 : REMARK d1=0.1221 d2=0.0979 d3=0.1079
154 : END
155 : REMARK d1=0.078811 d2=-0.945732 d3=-0.315244
156 : END
157 : \endverbatim
158 :
159 : The first set of argument values in this input file are the reference values for the arguments. The second any subsquent sets of arguments give the
160 : coefficients that should be used when constructing linear combinations.
161 :
162 : Notice, lastly, that you can also use a combination of argument values and atomic positions when specifying the reference configuration and the reference
163 : directions. If you are doing something this complicated, however, you are perhaps better working with the PLUMED input directly rather than this shortcut
164 : as you will need to take special measures to ensure that all your CVs are in the same units.
165 :
166 : */
167 : //+ENDPLUMEDOC
168 :
169 : class PCAVars : public ActionShortcut {
170 : public:
171 : static void registerKeywords(Keywords& keys);
172 : explicit PCAVars(const ActionOptions&);
173 : };
174 :
175 :
176 : PLUMED_REGISTER_ACTION(PCAVars,"PCAVARS")
177 :
178 9 : void PCAVars::registerKeywords( Keywords& keys ) {
179 9 : ActionShortcut::registerKeywords( keys );
180 18 : keys.add("compulsory","REFERENCE","a pdb file containing the set of reference configurations");
181 18 : keys.add("compulsory","TYPE","OPTIMAL-FAST","the manner in which distances are calculated. More information on the different "
182 : "metrics that are available in PLUMED can be found in the section of the manual on "
183 : "\\ref dists");
184 18 : keys.addInputKeyword("optional","ARG","scalar/vector","if there are arguments to be used specify them here");
185 18 : keys.addFlag("NOPBC",false,"do not use periodic boundary conditions when computing this quantity");
186 18 : keys.addOutputComponent("eig","default","vector","the projections on the eigenvalues");
187 18 : keys.addOutputComponent("residual","default","scalar","the residual distance that is not projected on any of the eigenvalues");
188 27 : keys.needsAction("RMSD"); keys.needsAction("PDB2CONSTANT"); keys.needsAction("TRANSPOSE");
189 36 : keys.needsAction("EUCLIDEAN_DISTANCE"); keys.needsAction("CONCATENATE"); keys.needsAction("COMBINE"); keys.needsAction("CONSTANT");
190 36 : keys.needsAction("COMBINE"); keys.needsAction("MATRIX_VECTOR_PRODUCT"); keys.needsAction("CUSTOM"); keys.needsAction("SUM");
191 9 : keys.needsAction("SELECT_COMPONENTS");
192 9 : }
193 :
194 5 : PCAVars::PCAVars( const ActionOptions& ao ):
195 : Action(ao),
196 5 : ActionShortcut(ao)
197 : {
198 10 : std::string reference; parse("REFERENCE",reference);
199 : // Create the object that holds the atomic positions by reading the first frame
200 5 : FILE* fp=std::fopen(reference.c_str(),"r"); PDB pdb; if(!fp) error("could not open reference file " + reference );
201 5 : bool do_read=pdb.readFromFilepointer(fp,false,0.1); if( !do_read ) plumed_merror("missing file " + reference );
202 5 : std::string mtype; parse("TYPE",mtype);
203 :
204 5 : if( pdb.getPositions().size()>0 ) {
205 : // And now create the rmsd object
206 3 : std::string rmsd_line = getShortcutLabel() + "_at: RMSD DISPLACEMENT SQUARED NUMBER=1 REFERENCE=" + reference;
207 6 : bool nopbc; parseFlag("NOPBC",nopbc); if(nopbc) rmsd_line += " NOPBC";
208 : // Now create the RMSD object
209 6 : readInputLine( rmsd_line + " TYPE=" + mtype );
210 : }
211 10 : std::vector<std::string> argnames; parseVector("ARG",argnames); unsigned nargs=0; std::string instargs, refargs; std::vector<Value*> theargs;
212 5 : if( argnames.size()>0 ) ActionWithArguments::interpretArgumentList( argnames, plumed.getActionSet(), this, theargs );
213 9 : for(unsigned i=0; i<theargs.size(); ++i) {
214 4 : std::string iargn = Path::fixArgumentName( theargs[i]->getName() ); nargs += theargs[i]->getNumberOfValues();
215 4 : if( theargs[i]->getNumberOfValues()>1 ) {
216 2 : readInputLine( getShortcutLabel() + "_ref_" + iargn + "T: PDB2CONSTANT NUMBER=1 REFERENCE=" + reference + " ARG=" + theargs[i]->getName() );
217 2 : readInputLine( getShortcutLabel() + "_ref_" + iargn + ": TRANSPOSE ARG=" + getShortcutLabel() + "_ref_" + iargn + "T");
218 6 : } else readInputLine( getShortcutLabel() + "_ref_" + iargn + ": PDB2CONSTANT NUMBER=1 REFERENCE=" + reference + " ARG=" + theargs[i]->getName() );
219 8 : if( i==0 ) { instargs=" ARG1=" + theargs[i]->getName(); refargs=" ARG2=" + getShortcutLabel() + "_ref_" + iargn; }
220 6 : else { instargs +="," + theargs[i]->getName(); refargs +="," + getShortcutLabel() + "_ref_" + iargn; }
221 : }
222 7 : if( theargs.size()>0 ) readInputLine( getShortcutLabel() + "_argdist: EUCLIDEAN_DISTANCE SQUARED" + instargs + refargs );
223 5 : if( pdb.getPositions().size()>0 && theargs.size()>0 ) {
224 0 : readInputLine( getShortcutLabel() + ": CONCATENATE ARG=" + getShortcutLabel() + "_at.disp," + getShortcutLabel() + "_argdist_diffT");
225 0 : readInputLine( getShortcutLabel() + "_dist: COMBINE ARG=" + getShortcutLabel() + "_at.dist," + getShortcutLabel() + "_argdist PERIODIC=NO");
226 : }
227 :
228 : // Get the displace stuff
229 5 : std::vector<double> displace( pdb.getBeta() ); double dtot = 0;
230 26 : for(unsigned i=0; i<displace.size(); ++i) dtot += displace[i];
231 26 : for(unsigned i=0; i<displace.size(); ++i) displace[i] = displace[i] / dtot;
232 :
233 : // Now read in the directions and create matheval objects to compute the pca components
234 5 : unsigned nfram=0, ncomp=0; std::string pvec;
235 13 : while( do_read ) {
236 13 : std::vector<double> argdir(nargs); PDB mypdb; do_read=mypdb.readFromFilepointer(fp,plumed.usingNaturalUnits(),0.1/plumed.getUnits().getLength());
237 13 : if( do_read ) {
238 8 : nfram++;
239 : // Normalize the eigenvector in the input
240 : double norm=0;
241 50 : for(unsigned i=0; i<mypdb.getPositions().size(); ++i) {
242 42 : norm += mypdb.getPositions()[i][0]*mypdb.getPositions()[i][0];
243 42 : norm += mypdb.getPositions()[i][1]*mypdb.getPositions()[i][1];
244 42 : norm += mypdb.getPositions()[i][2]*mypdb.getPositions()[i][2];
245 : }
246 : unsigned k=0;
247 12 : for(unsigned i=0; i<theargs.size(); ++i) {
248 4 : std::vector<double> argval( theargs[i]->getNumberOfValues() );
249 4 : if( !mypdb.getArgumentValue(theargs[i]->getName(), argval) ) error("argument " + theargs[i]->getName() + " was not set in pdb input");
250 10 : for(unsigned j=0; j<argval.size(); ++j) { argdir[k] = argval[j]; norm += argdir[k]*argdir[k]; k++; }
251 : }
252 8 : norm = sqrt( norm ); std::vector<double> normed_coeffs( 3*mypdb.getPositions().size() );
253 50 : for(unsigned i=0; i<mypdb.getPositions().size(); ++i) {
254 42 : if( mtype=="SIMPLE" ) {
255 28 : normed_coeffs[0*mypdb.getPositions().size()+i] = mypdb.getPositions()[i][0] / norm;
256 28 : normed_coeffs[1*mypdb.getPositions().size()+i] = mypdb.getPositions()[i][1] / norm;
257 28 : normed_coeffs[2*mypdb.getPositions().size()+i] = mypdb.getPositions()[i][2] / norm;
258 : } else {
259 14 : normed_coeffs[0*mypdb.getPositions().size()+i] = sqrt(displace[i])*mypdb.getPositions()[i][0] / norm;
260 14 : normed_coeffs[1*mypdb.getPositions().size()+i] = sqrt(displace[i])*mypdb.getPositions()[i][1] / norm;
261 14 : normed_coeffs[2*mypdb.getPositions().size()+i] = sqrt(displace[i])*mypdb.getPositions()[i][2] / norm;
262 : }
263 : }
264 : std::string coeff1;
265 8 : if( mypdb.getPositions().size()>0 ) {
266 6 : if( nfram==1 ) Tools::convert( normed_coeffs[0], pvec );
267 6 : else { Tools::convert( normed_coeffs[0], coeff1 ); pvec += "," + coeff1; }
268 126 : for(unsigned i=1; i<normed_coeffs.size(); ++i) {
269 120 : Tools::convert( normed_coeffs[i], coeff1 );
270 240 : pvec += "," + coeff1;
271 : }
272 6 : for(unsigned i=0; i<argdir.size(); ++i) { Tools::convert( argdir[i] / norm, coeff1 ); pvec += "," + coeff1; }
273 2 : } else if( theargs.size()>0 ) {
274 2 : if( nfram==1 ) Tools::convert( argdir[0] / norm, pvec );
275 0 : else { Tools::convert( argdir[0] / norm, coeff1 ); pvec += "," + coeff1; }
276 6 : for(unsigned i=1; i<argdir.size(); ++i) { Tools::convert( argdir[i] / norm, coeff1 ); pvec += "," + coeff1; }
277 : }
278 8 : ncomp = 3*mypdb.getPositions().size() + nargs;
279 : } else { break; }
280 13 : }
281 5 : std::fclose(fp); std::string neig, ncols; Tools::convert( nfram, neig ); Tools::convert( ncomp, ncols );
282 10 : readInputLine( getShortcutLabel() + "_peig: CONSTANT VALUES=" + pvec + " NROWS=" + neig + " NCOLS=" + ncols );
283 5 : if( pdb.getPositions().size()>0 && theargs.size()>0 ) readInputLine( getShortcutLabel() + "_eig: MATRIX_VECTOR_PRODUCT ARG=" + getShortcutLabel() + "_peig," + getShortcutLabel() );
284 8 : else if( pdb.getPositions().size()>0 ) readInputLine( getShortcutLabel() + "_eig: MATRIX_VECTOR_PRODUCT ARG=" + getShortcutLabel() + "_peig," + getShortcutLabel() + "_at.disp");
285 4 : else if( theargs.size()>0 ) readInputLine( getShortcutLabel() + "_eig: MATRIX_VECTOR_PRODUCT ARG=" + getShortcutLabel() + "_peig," + getShortcutLabel() + "_argdist_diffT");
286 13 : for(unsigned i=0; i<nfram; ++i) {
287 8 : std::string num; Tools::convert( i+1, num );
288 16 : readInputLine( getShortcutLabel() + "_eig-" + num + ": SELECT_COMPONENTS ARG=" + getShortcutLabel() + "_eig COMPONENTS=" + num );
289 : }
290 10 : readInputLine( getShortcutLabel() + "_eig2: CUSTOM ARG=" + getShortcutLabel() + "_eig FUNC=x*x PERIODIC=NO");
291 10 : readInputLine( getShortcutLabel() + "_eigsum2: SUM ARG=" + getShortcutLabel() + "_eig2 PERIODIC=NO");
292 5 : if( pdb.getPositions().size()>0 && theargs.size()>0 ) readInputLine( getShortcutLabel() + "_residual: CUSTOM ARG=" + getShortcutLabel() + "_dist," + getShortcutLabel() + "_eigsum2 FUNC=sqrt(x-y) PERIODIC=NO");
293 8 : else if( pdb.getPositions().size()>0 ) readInputLine( getShortcutLabel() + "_residual: CUSTOM ARG=" + getShortcutLabel() + "_at.dist," + getShortcutLabel() + "_eigsum2 FUNC=sqrt(x-y) PERIODIC=NO");
294 4 : else if( theargs.size()>0 ) readInputLine( getShortcutLabel() + "_residual: CUSTOM ARG=" + getShortcutLabel() + "_argdist," + getShortcutLabel() + "_eigsum2 FUNC=sqrt(x-y) PERIODIC=NO");
295 15 : }
296 :
297 : }
298 : }
299 :
300 :
|