Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2012-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionShortcut.h"
23 : #include "core/ActionRegister.h"
24 : #include "core/ActionWithArguments.h"
25 : #include "core/PlumedMain.h"
26 : #include "tools/PDB.h"
27 :
28 : namespace PLMD {
29 : namespace generic {
30 :
31 : //+PLUMEDOC COLVAR PDB2CONSTANT
32 : /*
33 : Create a constant value from a PDB input file
34 :
35 : This shortcut converts the contents of a PDB file to one or more [CONSTANT](CONSTANT.md) actions.
36 : Converting PDB files to Values in this way is useful because it means that when we implement methods, like those
37 : in the [refdist](module_refdist.md) or [mapping](module_mapping.md) modules or the [RMSD](RMSD.md) action, that calculate the distance between two
38 : configurations those two configurations are both stored in PLUMED values. The same code can thus be used to
39 : calculate the difference between the instantaneous configuration
40 : and a constant reference configuration that was read from a file or between two reference configuration.
41 :
42 : The following example illustrates how this action can be used to read a set of reference atomic positions
43 :
44 : ```plumed
45 : #SETTINGS INPUTFILES=regtest/basic/rt19/test0.pdb
46 : ref: PDB2CONSTANT REFERENCE=regtest/basic/rt19/test0.pdb
47 : ```
48 :
49 : You can see how the reference positions are converted to [CONSTANT](CONSTANT.md) action that outputs a vector by expanding the shortcut.
50 :
51 : You can also use this command to read in multiple reference positions as illustrated below:
52 :
53 : ```plumed
54 : #SETTINGS INPUTFILES=regtest/mapping/rt-pathtools-3/all.pdb
55 : ref: PDB2CONSTANT REFERENCE=regtest/mapping/rt-pathtools-3/all.pdb
56 : ```
57 :
58 : The [CONSTANT](CONSTANT.md) that is created by this action is a matrix. Each row of the output matrix contains one set of reference positions.
59 : Notice also that if you have a PDB input which contains multiple reference configurations you can create a vector constant by using the `NUMBER`
60 : keyword to specify the particular configuration that you would like to read in as shown below:
61 :
62 : ```plumed
63 : #SETTINGS INPUTFILES=regtest/mapping/rt-pathtools-3/all.pdb
64 : ref: PDB2CONSTANT REFERENCE=regtest/mapping/rt-pathtools-3/all.pdb NUMBER=4
65 : ```
66 :
67 : The input above will reads in the fourth configuration in the input PDB file only.
68 :
69 : ## The PDB file format
70 :
71 : PLUMED uses the PDB file format here and in several other places
72 :
73 : - To read the molecular structure ([MOLINFO](MOLINFO.md)).
74 : - To read reference conformations ([RMSD](RMSD.md), but also in methods such as [FIT_TO_TEMPLATE](FIT_TO_TEMPLATE.md), etc).
75 :
76 : The implemented PDB reader expects a file formatted correctly according to the
77 : [PDB standard](http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html).
78 : In particular, the following columns are read from ATOM records
79 :
80 : ````
81 : columns | content
82 : 1-6 | record name (ATOM or HETATM)
83 : 7-11 | serial number of the atom (starting from 1)
84 : 13-16 | atom name
85 : 18-20 | residue name
86 : 22 | chain id
87 : 23-26 | residue number
88 : 31-38 | x coordinate
89 : 39-46 | y coordinate
90 : 47-54 | z coordinate
91 : 55-60 | occupancy
92 : 61-66 | beta factor
93 : ````
94 :
95 : The PLUMED parser is slightly more permissive than the official PDB format
96 : in the fact that the format of real numbers is not fixed. In other words,
97 : any real number that can be parsed is OK and the dot can be placed anywhere. However,
98 : __columns are interpret strictly__. A sample PDB should look like the following
99 :
100 : ````
101 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
102 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
103 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 1.00 1.00
104 : ````
105 :
106 : Notice that serial numbers need not to be consecutive. In the three-line example above,
107 : only the coordinates of three atoms are provided. This is perfectly legal and indicates to PLUMED
108 : that information about these atoms only is available. This could be both for structural
109 : information in [MOLINFO](MOLINFO.md), where the other atoms would have no name assigned, and for
110 : reference structures used in [RMSD](RMSD.md), where only the provided atoms would be used to compute RMSD.
111 :
112 : ## Including arguments in PDB files
113 :
114 : If you wish to specify reference values for PLUMED Values in the REMARKS of a PLUMED input file like this:
115 :
116 : ````
117 : REMARK t1=-4.3345
118 : REMARK t2=3.4725
119 : END
120 : ````
121 :
122 : You can read in these reference values by using the PDB2CONSTANT command as follows:
123 :
124 : ```plumed
125 : #SETTINGS INPUTFILES=regtest/mapping/rt-pathtools-4/epath.pdb
126 : t1: TORSION ATOMS=1,2,3,4
127 : t2: TORSION ATOMS=5,6,7,8
128 : t1_ref: PDB2CONSTANT REFERENCE=regtest/mapping/rt-pathtools-4/epath.pdb ARG=t1
129 : t2_ref: PDB2CONSTANT REFERENCE=regtest/mapping/rt-pathtools-4/epath.pdb ARG=t2
130 : ```
131 :
132 : Notice that the input must define values with the labels that are being read in from the reference file
133 : and that separate PDB2CONSTANT commands are required for reading in `t1` and `t2`. Furthermore, because the
134 : input PDB file contains multiple frames vectors containing all the values for `t1` and `t2` are output from
135 : the constant commands that are created by the shortcuts in the above input. If you want to read only one of the
136 : configurations in the input PDB file you can use a pdb with a single frame or the `NUMBER` keyword described above.
137 :
138 : ## Occupancy and beta factors
139 :
140 : PLUMED also reads the occupancy and beta factors from the input PDB files. However, these columns of data are
141 : given a very special meaning.
142 : In cases where the PDB structure is used as a reference for an alignment (that's the case
143 : for instance in [RMSD](RMSD.md) and in [FIT_TO_TEMPLATE](FIT_TO_TEMPLATE.md)), the occupancy column is used
144 : to provide the weight of each atom in the alignment. In cases where, perhaps after alignment,
145 : the displacement between running coordinates and the provided PDB is computed, the beta factors
146 : are used as weight for the displacement.
147 : Since setting the weights to zero is the same as __not__ including an atom in the alignment or
148 : displacement calculation, the two following reference files would be equivalent when used in an [RMSD](RMSD.md)
149 : calculation. First file:
150 : \verbatim
151 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
152 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
153 : ATOM 9 CA ALA 2 19.462 -11.088 -8.986 0.00 0.00
154 : \endverbatim
155 : Second file:
156 : \verbatim
157 : ATOM 2 CH3 ACE 1 12.932 -14.718 -6.016 1.00 1.00
158 : ATOM 5 C ACE 1 21.312 -9.928 -5.946 1.00 1.00
159 : \endverbatim
160 : However notice that many extra atoms with zero weight might slow down the calculation, so
161 : removing lines is better than setting their weights to zero.
162 : In addition, weights for alignment need not to be equivalent to weights for displacement.
163 : Starting with PLUMED 2.7, if all the weights are set to zero they will be normalized to be equal to the
164 : inverse of the number of involved atoms. This means that it will be possible to use files with
165 : the weight columns set to zero obtaining a meaningful result. In previous PLUMED versions,
166 : setting all weights to zero was resulting in an error instead.
167 :
168 : ## Systems with more than 100k atoms
169 :
170 : Notice that it very likely does not make any sense to compute the [RMSD](RMSD.md) or any other structural
171 : deviation __using__ many atoms. However, if the protein for which you want to compute [RMSD](RMSD.md)
172 : has atoms with large serial numbers (e.g. because it is located __after__ solvent in the sorted list of atoms)
173 : you might end up with troubles with the limitations of the PDB format. Indeed, since there are 5
174 : columns available for atom serial number, this number cannot be larger than 99999.
175 : In addition, providing [MOLINFO](MOLINFO.md) with names associated to atoms with a serial larger than 99999 would be impossible.
176 :
177 : Since PLUMED 2.4 we allow the [hybrid 36](http://cci.lbl.gov/hybrid_36/) format to be used to specify atom numbers.
178 : This format is not particularly widespread, but has the nice feature that it provides a one-to-one mapping
179 : between numbers up to approximately 80 millions and strings with 5 characters, plus it is backward compatible
180 : for numbers smaller than 100000. This is not true for notations like the hex notation exported by VMD.
181 : Using the hybrid 36 format, the ATOM records for atom ranging from 99997 to 100002 would read like these:
182 : \verbatim
183 : ATOM 99997 Ar X 1 45.349 38.631 15.116 1.00 1.00
184 : ATOM 99998 Ar X 1 46.189 38.631 15.956 1.00 1.00
185 : ATOM 99999 Ar X 1 46.189 39.471 15.116 1.00 1.00
186 : ATOM A0000 Ar X 1 45.349 39.471 15.956 1.00 1.00
187 : ATOM A0000 Ar X 1 45.349 38.631 16.796 1.00 1.00
188 : ATOM A0001 Ar X 1 46.189 38.631 17.636 1.00 1.00
189 : \endverbatim
190 : There are tools that can be found to translate from integers to strings and back using hybrid 36 format
191 : (a simple python script can be found [here](https://sourceforge.net/p/cctbx/code/HEAD/tree/trunk/iotbx/pdb/hybrid_36.py)).
192 : In addition, as of PLUMED 2.5, we provide a command line tool that can be used to renumber atoms in a PDB file.
193 :
194 : */
195 : //+ENDPLUMEDOC
196 :
197 : class PDB2Constant : public ActionShortcut {
198 : public:
199 : static void registerKeywords(Keywords& keys);
200 : explicit PDB2Constant(const ActionOptions&);
201 : };
202 :
203 : PLUMED_REGISTER_ACTION(PDB2Constant,"PDB2CONSTANT")
204 :
205 168 : void PDB2Constant::registerKeywords(Keywords& keys) {
206 168 : ActionShortcut::registerKeywords( keys );
207 168 : keys.add("compulsory","REFERENCE","a file in pdb format containing the reference structure");
208 168 : keys.add("compulsory","NUMBER","0","if there are multiple structures in the pdb file you can specify that you want the RMSD from a specific structure by specifying its place in the file here. If NUMBER=0 then the RMSD from all structures are computed");
209 168 : keys.addFlag("NOARGS",false,"the arguments that are being read from the PDB file are not in the plumed input");
210 336 : keys.addInputKeyword("optional","ARG","scalar/vector","read this single argument from the input rather than the atomic structure");
211 336 : keys.setValueDescription("scalar/vector/matrix","a value that is constructed from the information in the PDB file");
212 168 : keys.needsAction("CONSTANT");
213 168 : }
214 :
215 115 : PDB2Constant::PDB2Constant(const ActionOptions& ao):
216 : Action(ao),
217 115 : ActionShortcut(ao) {
218 : std::string input;
219 115 : parse("REFERENCE",input);
220 : unsigned frame;
221 115 : parse("NUMBER",frame);
222 115 : bool noargs=false;
223 : std::vector<std::string> argn;
224 230 : parseVector("ARG",argn);
225 : std::vector<Value*> theargs;
226 115 : if( argn.size()>0 ) {
227 75 : parseFlag("NOARGS",noargs);
228 75 : if( !noargs ) {
229 73 : ActionWithArguments::interpretArgumentList( argn, plumed.getActionSet(), this, theargs );
230 2 : } else if( argn.size()>1 ) {
231 0 : error("can only read one argument at a time from input pdb file");
232 : } else {
233 2 : log.printf(" reading argument %s from file \n", argn[0].c_str() );
234 : }
235 : }
236 115 : if( theargs.size()>1 ) {
237 0 : error("can only read one argument at a time from input pdb file");
238 : }
239 :
240 115 : FILE* fp=std::fopen(input.c_str(),"r");
241 : bool do_read=true;
242 : std::vector<double> vals;
243 115 : if(!fp) {
244 0 : plumed_merror("could not open reference file " + input);
245 : }
246 115 : unsigned natoms=0, nframes=0;
247 :
248 2235 : while ( do_read ) {
249 2235 : PDB mypdb;
250 2235 : do_read=mypdb.readFromFilepointer(fp,plumed.usingNaturalUnits(),0.1/plumed.getUnits().getLength());
251 2235 : if( !do_read && nframes>0 ) {
252 : break ;
253 : }
254 :
255 2120 : if( natoms==0 ) {
256 1378 : natoms = mypdb.getPositions().size();
257 742 : } else if( mypdb.getPositions().size()!=natoms ) {
258 0 : plumed_merror("mismatch between sizes of reference configurations");
259 : }
260 :
261 2120 : if( nframes+1==frame || frame==0 ) {
262 1642 : std::vector<double> align( mypdb.getOccupancy() );
263 : double asum=0;
264 10326 : for(unsigned i=0; i<align.size(); ++i) {
265 8684 : asum += align[i];
266 : }
267 1642 : if( asum>epsilon ) {
268 674 : double iasum = 1 / asum;
269 9358 : for(unsigned i=0; i<align.size(); ++i) {
270 8684 : align[i] *= iasum;
271 : }
272 968 : } else if( mypdb.size()>0 ) {
273 0 : double iasum = 1 / mypdb.size();
274 0 : for(unsigned i=0; i<align.size(); ++i) {
275 0 : align[i] = iasum;
276 : }
277 : }
278 1642 : Vector center;
279 1642 : center.zero();
280 10326 : for(unsigned i=0; i<mypdb.getPositions().size(); ++i) {
281 8684 : center += align[i]*mypdb.getPositions()[i];
282 : }
283 :
284 1642 : if( theargs.size()==0 && argn.size()==0 ) {
285 1688 : for(unsigned j=0; j<3; ++j) {
286 17490 : for(unsigned i=0; i<mypdb.getPositions().size(); ++i) {
287 16224 : vals.push_back( mypdb.getPositions()[i][j] - center[j] );
288 : }
289 : }
290 1220 : } else if( noargs ) {
291 20 : std::vector<double> argvals( 1 );
292 20 : if( !mypdb.getArgumentValue(argn[0], argvals ) ) {
293 0 : error("argument " + argn[0] + " was not set in pdb input");
294 : }
295 20 : vals.push_back( argvals[0] );
296 : } else {
297 1200 : std::vector<double> argvals( theargs[0]->getNumberOfValues() );
298 1200 : if( !mypdb.getArgumentValue(theargs[0]->getName(), argvals ) ) {
299 0 : error("argument " + theargs[0]->getName() + " was not set in pdb input");
300 : }
301 2402 : for(unsigned i=0; i<argvals.size(); ++i) {
302 1202 : vals.push_back( argvals[i] );
303 : }
304 : }
305 : }
306 2120 : nframes++;
307 2235 : }
308 115 : if( frame>0 ) {
309 68 : nframes=1;
310 : }
311 115 : std::fclose(fp);
312 : std::string rnum;
313 115 : plumed_assert( vals.size()>0 );
314 115 : Tools::convert( vals[0], rnum );
315 115 : std::string valstr = " VALUES=" + rnum;
316 17446 : for(unsigned i=1; i<vals.size(); ++i) {
317 17331 : Tools::convert( vals[i], rnum );
318 34662 : valstr += "," + rnum;
319 : }
320 115 : if( vals.size()>nframes ) {
321 : std::string nc, nr;
322 41 : Tools::convert( nframes, nr );
323 41 : Tools::convert( vals.size()/nframes, nc );
324 82 : readInputLine( getShortcutLabel() + ": CONSTANT NROWS=" + nr + " NCOLS=" + nc + valstr );
325 : } else {
326 148 : readInputLine( getShortcutLabel() + ": CONSTANT" + valstr );
327 : }
328 230 : }
329 :
330 : }
331 : }
|