Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2011-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "Colvar.h"
23 : #include "core/PlumedMain.h"
24 : #include "core/ActionRegister.h"
25 : #include "tools/RMSD.h"
26 : #include "tools/PDB.h"
27 :
28 : namespace PLMD {
29 : namespace colvar {
30 :
31 : class RMSD : public Colvar {
32 :
33 : bool squared;
34 : bool nopbc;
35 : PLMD::RMSD myrmsd;
36 : std::vector<Vector> der;
37 : public:
38 : explicit RMSD(const ActionOptions&);
39 : void calculate() override;
40 : static void registerKeywords(Keywords& keys);
41 : };
42 :
43 : //+PLUMEDOC DCOLVAR RMSD_SCALAR
44 : /*
45 : Calculate the RMSD with respect to a reference structure.
46 :
47 : \par Examples
48 :
49 : */
50 : //+ENDPLUMEDOC
51 :
52 : //+PLUMEDOC DCOLVAR RMSD
53 : /*
54 : Calculate the RMSD with respect to a reference structure.
55 :
56 : The aim of this colvar is to calculate something like:
57 :
58 : $$
59 : d(X,X') = \vert X-X' \vert
60 : $$
61 :
62 : where $X$ is the instantaneous position of all the atoms in the system and
63 : $X'$ is the positions of the atoms in some reference structure that is provided as input.
64 : $d(X,X')$ thus measures the distance all the atoms have moved away from this reference configuration.
65 : Oftentimes, it is only the internal motions of the structure - i.e. not the translations of the center of
66 : mass or the rotations of the reference frame - that are interesting. Hence, when calculating
67 : the root-mean-square deviation between the atoms in two configurations
68 : you must first superimpose the two structures in some way. At present PLUMED provides two distinct ways
69 : of performing this superposition. The first method is applied when you use TYPE=SIMPLE in the input
70 : line. This instruction tells PLUMED that the root mean square deviation is to be calculated after the
71 : positions of the geometric centers in the reference and instantaneous configurations are aligned. In
72 : other words $d(X,x')$ is to be calculated using:
73 :
74 : $$
75 : d(X,X') = \sqrt{ \sum_i \sum_\alpha^{x,y,z} \frac{w_i}{\sum_j w_j} ( X _{i,\alpha}-com _\alpha(X)-{X'} _{i,\alpha}+com _\alpha(X') )^2 }
76 : $$
77 :
78 : with
79 :
80 : $$
81 : com_\alpha (X) = \sum_i \frac{w'_{i}}{\sum_j w'_j} X _{i,\alpha}
82 : $$
83 :
84 : and
85 :
86 : $$
87 : com_\alpha(X')= \sum_i \frac{w'_{i}}{\sum_j w'_j}X' _{i,\alpha}
88 : $$
89 :
90 : Obviously, $com_\alpha(X)$ and $com_\alpha(X')$ represent the positions of the center of mass in the reference
91 : and instantaneous configurations if the weights $w'$ are set equal to the atomic masses. If the weights are all set equal to
92 : one, however, $com_\alpha(X)$ and $com_\alpha(X')$ are the positions of the geometric centers.
93 :
94 : An example input that can be used to calculate and print this RMSD distance is shown below:
95 :
96 : ```plumed
97 : #SETTINGS INPUTFILES=regtest/basic/rt19/test0.pdb
98 : rmsd0: RMSD TYPE=SIMPLE REFERENCE=regtest/basic/rt19/test0.pdb
99 : PRINT ARG=rmsd0 FILE=colvar
100 : ```
101 :
102 : Notice that there are sets of weights: $w'$ and $w$ in the formulas above. The first of these weights is used to calculate the position of the center of mass
103 : (so it determines how the atoms are _aligned_). The second set of weights, $w$ is used when calculating how far the atoms have been
104 : _displaced_. These weights are assigned in the reference configuration that you provide as input (i.e. the appear in the input file
105 : to this action that you set using REFERENCE=whatever.pdb). As you can see in the input above, this input consists of a simple pdb file
106 : containing the set of atoms for which you want to calculate the RMSD displacement and their positions in the reference configuration.
107 : It is important to note that the indices in this pdb need to be set correctly. The indices in this file determine the indices of the
108 : instantaneous atomic positions that are used by PLUMED when calculating this colvar. As such if you want to calculate the RMSD distance
109 : moved by the first, fourth, sixth and twenty eighth atoms in the MD codes input file then the indices of the corresponding reference positions in this pdb
110 : file should be set equal to 1, 4, 6 and 28.
111 :
112 : The pdb input file should also contain the weights $w$ and $w'$. These second of these sets of weights, $w'$, appears in the OCCUPANCY column (the first column after the coordinates).
113 : These are the weights that are used to calculate the position of the center of mass. The BETA column (the second column
114 : after the Cartesian coordinates) is used to provide the $w$ values which are used in the the calculation of the displacement.
115 : Please note that it is possible to use fractional values for beta and for the occupancy. However, we recommend you only do this when
116 : you really know what you are doing however as the results can be rather strange. A more common practise is to use different sets of atoms
117 : for the alignment and the displacement. You can do this by setting the $w$ and $w'$ values for all the atoms you wish to use for alignment only equal to zero and
118 : one respectively and by setting the $w$ and $w'$ values for all the atoms you wish to use for displacement only to one and zero respectively.
119 :
120 : In the PDB input files that you use for RMSD the atomic coordinates and box lengths should be in Angstroms unless
121 : you are working with natural units. If you are working with natural units then the coordinates
122 : should be in your natural length unit. You can find more details on the PDB file format [here](http://www.wwpdb.org/docs.html).
123 : Please make sure your PDB file is correctly formatted. More detail on the format for PDB files can be found in the documentation for the [PDB2CONSTANT](PDB2CONSTANT.md) action.
124 :
125 : The following input uses a different method to calculate the RMSD distance as you can tell from the TYPE=OPTIMAL on the input line.
126 :
127 : ```plumed
128 : #SETTINGS INPUTFILES=regtest/basic/rt19/test0.pdb
129 : rmsd0: RMSD TYPE=OPTIMAL REFERENCE=regtest/basic/rt19/test0.pdb
130 : PRINT ARG=rmsd0 FILE=colvar
131 : ```
132 :
133 : In this case the root mean square deviation is calculated after the positions of geometric centers in the reference and instantaneous configurations are aligned AND after
134 : an optimal alignment of the two frames is performed so that motion due to rotation of the reference frame between the two structures is
135 : removed. The equation for $d(X,X')$ in this case is:
136 :
137 : $$
138 : d(X,X') = \sqrt{ \sum_i \sum_\alpha^{x,y,z} \frac{w_i}{\sum_j w_j}[ X_{i,\alpha}-com_\alpha(X)- \sum_\beta M(X,X',w')_{\alpha,\beta}({X'}_{i,\beta}-com_\beta(X')) ]^2 }
139 : $$
140 :
141 : where $M(X,X',w')$ is the optimal alignment matrix which is calculated using the Kearsley algorithm that is described in the paper from the bibliography below. Again different sets of
142 : weights are used for the alignment ($w'$) and for the displacement calculations ($w$).
143 : This gives a great deal of flexibility as it allows you to use a different sets of atoms (which may or may not overlap) for the alignment and displacement
144 : parts of the calculation. This may be very useful when you want to calculate how a ligand moves about in a protein cavity as you can use the protein as a reference
145 : system and do no alignment of the ligand.
146 :
147 : (Note: when this form of RMSD is used to calculate the secondary structure variables ([ALPHARMSD](ALPHARMSD.md), [ANTIBETARMSD](ANTIBETARMSD.md) and [PARABETARMSD](PARABETARMSD.md)
148 : all the atoms in the segment are assumed to be part of both the alignment and displacement sets and all weights are set equal to one)
149 :
150 : The $d(X,X')$ values that are calculated when you use the TYPE=SIMPLE and TYPE=OPTIMAL variants of RMSD are scalars. These scalars tell you the length of the vector of displacements,
151 : $X - X'$, between the instantaneous and reference positions. If you would like to access this vector of displacements instead of its length you can by using the DISPLACEMENT keyword
152 : as shown below for TYPE=SIMPLE
153 :
154 : ```plumed
155 : #SETTINGS INPUTFILES=regtest/basic/rt19/test0.pdb
156 : rmsd0: RMSD TYPE=SIMPLE DISPLACEMENT REFERENCE=regtest/basic/rt19/test0.pdb
157 : PRINT ARG=rmsd0.disp,rmsd0.dist FILE=colvar
158 : ```
159 :
160 : or as shown below for TYPE=OPTIMAL
161 :
162 : ```plumed
163 : #SETTINGS INPUTFILES=regtest/basic/rt19/test0.pdb
164 : rmsd0: RMSD TYPE=OPTIMAL DISPLACEMENT REFERENCE=regtest/basic/rt19/test0.pdb
165 : PRINT ARG=rmsd0.disp,rmsd0.dist FILE=colvar
166 : ```
167 :
168 : The RMSD command for these inputs output two components
169 :
170 : - `dist` - the length of displacement vector that is output when you don't use the DISPLACEMENT keyword
171 : - `disp` - the 3N dimensional vector of atomic dispacements, where N is the number of atoms.
172 :
173 : These vectors of displacements are used if you use the [PCAVARS](PCAVARS.md) action to compute the projection of the displacement on a particular reference vector.
174 :
175 : You can also define multiple reference configurations in the reference input as is done in the following example:
176 :
177 : ```plumed
178 : #SETTINGS INPUTFILES=regtest/mapping/rt39/all1.pdb
179 : rmsd: RMSD TYPE=OPTIMAL REFERENCE=regtest/mapping/rt39/all1.pdb
180 : PRINT ARG=rmsd FILE=colvar
181 : ```
182 :
183 : The output from RMSD in this case is a vector that contains the RMSD distances from each of the reference configurations in your input file. This feature is used in the
184 : [PATH](PATH.md) shortcut. Furthermore, you can use the DISPLACEMENT keyword when there are multiple reference configurations in the input file as shown below:
185 :
186 : ```plumed
187 : #SETTINGS INPUTFILES=regtest/mapping/rt39/all1.pdb
188 : rmsd: RMSD TYPE=OPTIMAL DISPLACEMENT REFERENCE=regtest/mapping/rt39/all1.pdb
189 : PRINT ARG=rmsd.disp,rmsd.dist FILE=colvar
190 : ```
191 :
192 : The RMSD command here still outputs the `dist` and `disp` components but now `dist` is a vector and `disp` is a matrix. This type of command is used to implement
193 : the [GEOMETRIC_PATH](GEOMETRIC_PATH.md) shortcut. For this command you need information on the distances from a set of reference configurations that can be found
194 : in the `dist` component as well as the information on the displacement vectors between the instantaneous position and each of the reference configurations that is contained in the `dist` matrix.
195 :
196 : Please note that there are a number of other methods for calculating the distance between the instantaneous configuration and a reference configuration that are available in plumed.
197 :
198 : ## A note on periodic boundary conditions
199 :
200 : When periodic boundary conditions are used, the atoms should be
201 : in the proper periodic image. This has been done automatically since PLUMED 2.5,
202 : by considering the ordered list of atoms and rebuilding molecules using a procedure
203 : that is equivalent to that done in [WHOLEMOLECULES](WHOLEMOLECULES.md). Notice that
204 : rebuilding is local to this action. This is different from [WHOLEMOLECULES](WHOLEMOLECULES.md)
205 : which actually modifies the coordinates stored in PLUMED.
206 :
207 : In case you want to recover the old behavior you should use the NOPBC flag.
208 : In that case you need to take care that atoms are in the correct
209 : periodic image.
210 :
211 : */
212 : //+ENDPLUMEDOC
213 :
214 : PLUMED_REGISTER_ACTION(RMSD,"RMSD_SCALAR")
215 :
216 165 : void RMSD::registerKeywords(Keywords& keys) {
217 165 : Colvar::registerKeywords(keys);
218 330 : keys.setDisplayName("RMSD");
219 165 : keys.add("compulsory","REFERENCE","a file in pdb format containing the reference structure and the atoms involved in the CV.");
220 165 : keys.add("compulsory","TYPE","SIMPLE","the manner in which RMSD alignment is performed. Should be OPTIMAL or SIMPLE.");
221 165 : keys.addFlag("SQUARED",false," This should be set if you want mean squared displacement instead of RMSD ");
222 330 : keys.setValueDescription("scalar","the RMSD between the instantaneous structure and the reference structure that was input");
223 165 : }
224 :
225 82 : RMSD::RMSD(const ActionOptions&ao):
226 : PLUMED_COLVAR_INIT(ao),
227 82 : squared(false),
228 82 : nopbc(false) {
229 : std::string reference;
230 165 : parse("REFERENCE",reference);
231 : std::string type;
232 82 : type.assign("SIMPLE");
233 82 : parse("TYPE",type);
234 82 : parseFlag("SQUARED",squared);
235 82 : parseFlag("NOPBC",nopbc);
236 82 : checkRead();
237 :
238 83 : addValueWithDerivatives();
239 82 : setNotPeriodic();
240 82 : PDB pdb;
241 :
242 : // read everything in ang and transform to nm if we are not in natural units
243 82 : if( !pdb.read(reference,usingNaturalUnits(),0.1/getUnits().getLength()) ) {
244 0 : error("missing input file " + reference );
245 : }
246 82 : myrmsd.set( pdb, type, true, true );
247 :
248 82 : std::vector<AtomNumber> atoms( pdb.getAtomNumbers() );
249 82 : requestAtoms( atoms );
250 81 : der.resize( atoms.size() );
251 :
252 81 : log.printf(" reference from file %s\n",reference.c_str());
253 81 : log.printf(" which contains %d atoms\n",getNumberOfAtoms());
254 81 : log.printf(" with indices : ");
255 3991 : for(unsigned i=0; i<atoms.size(); ++i) {
256 3910 : if(i%25==0) {
257 188 : log<<"\n";
258 : }
259 3910 : log.printf("%d ",atoms[i].serial());
260 : }
261 81 : log.printf("\n");
262 81 : log.printf(" method for alignment : %s \n",type.c_str() );
263 81 : if(squared) {
264 11 : log.printf(" chosen to use SQUARED option for MSD instead of RMSD\n");
265 : }
266 81 : if(nopbc) {
267 61 : log.printf(" without periodic boundary conditions\n");
268 : } else {
269 20 : log.printf(" using periodic boundary conditions\n");
270 : }
271 166 : }
272 :
273 :
274 : // calculator
275 40518 : void RMSD::calculate() {
276 40518 : if(!nopbc) {
277 3956 : makeWhole();
278 : }
279 40518 : double r=myrmsd.calculate( getPositions(), der, squared );
280 :
281 40518 : setValue(r);
282 14618145 : for(unsigned i=0; i<getNumberOfAtoms(); i++) {
283 14577627 : setAtomsDerivatives( i, der[i] );
284 : }
285 40518 : setBoxDerivativesNoPbc();
286 40518 : }
287 :
288 : }
289 : }
290 :
291 :
292 :
|