Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2012-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "GenericMolInfo.h"
23 : #include "ActionRegister.h"
24 : #include "ActionSet.h"
25 : #include "PlumedMain.h"
26 : #include "tools/MolDataClass.h"
27 : #include "tools/PDB.h"
28 : #include "tools/Communicator.h"
29 : #include "config/Config.h"
30 :
31 : namespace PLMD {
32 :
33 :
34 : /*
35 : This action is defined in core/ as it is used by other actions.
36 : Anyway, it is registered in setup/, so that excluding that module from
37 : compilation will exclude it from plumed.
38 : */
39 :
40 :
41 120 : void GenericMolInfo::registerKeywords( Keywords& keys ) {
42 120 : ActionSetup::registerKeywords(keys);
43 120 : keys.add("compulsory","STRUCTURE","a file in pdb format containing a reference structure. "
44 : "This is used to defines the atoms in the various residues, chains, etc . "
45 : "For more details on the PDB file format visit http://www.wwpdb.org/docs.html");
46 120 : keys.add("compulsory","MOLTYPE","protein","what kind of molecule is contained in the pdb file - usually not needed since protein/RNA/DNA are compatible");
47 120 : keys.add("compulsory","PYTHON_BIN","default","python interpreter");
48 120 : keys.add("atoms","CHAIN","(for masochists ( mostly Davide Branduardi ) ) The atoms involved in each of the chains of interest in the structure.");
49 120 : keys.add("hidden","STRIDE","frequency for resetting the python interpreter. Should be 1.");
50 120 : keys.addFlag("WHOLE", false, "The reference structure is whole, i.e. not broken by PBC");
51 120 : }
52 :
53 236 : GenericMolInfo::~GenericMolInfo() {
54 : // empty destructor to delete unique_ptr
55 354 : }
56 :
57 118 : GenericMolInfo::GenericMolInfo( const ActionOptions&ao ):
58 : Action(ao),
59 : ActionAnyorder(ao),
60 : ActionPilot(ao),
61 : ActionAtomistic(ao),
62 118 : iswhole_(false) {
63 118 : plumed_assert(getStride()==1);
64 : // Read what is contained in the pdb file
65 118 : parse("MOLTYPE",mytype);
66 :
67 : // check if whole
68 118 : parseFlag("WHOLE", iswhole_);
69 :
70 118 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
71 118 : if( moldat ) {
72 4 : log<<" overriding last MOLINFO with label " << moldat->getLabel()<<"\n";
73 : }
74 :
75 : std::vector<AtomNumber> backbone;
76 236 : parseAtomList("CHAIN",backbone);
77 118 : if( read_backbone.size()==0 ) {
78 0 : for(unsigned i=1;; ++i) {
79 236 : parseAtomList("CHAIN",i,backbone);
80 118 : if( backbone.size()==0 ) {
81 : break;
82 : }
83 0 : read_backbone.push_back(backbone);
84 0 : backbone.resize(0);
85 : }
86 : } else {
87 0 : read_backbone.push_back(backbone);
88 : }
89 118 : if( read_backbone.size()==0 ) {
90 118 : parse("STRUCTURE",reference);
91 :
92 118 : if( ! pdb.read(reference,usingNaturalUnits(),0.1/getUnits().getLength())) {
93 0 : plumed_merror("missing input file " + reference );
94 : }
95 :
96 : std::vector<std::string> chains;
97 118 : pdb.getChainNames( chains );
98 118 : log.printf(" pdb file named %s contains %u chains \n",reference.c_str(), static_cast<unsigned>(chains.size()));
99 346 : for(unsigned i=0; i<chains.size(); ++i) {
100 : unsigned start,end;
101 : std::string errmsg;
102 228 : pdb.getResidueRange( chains[i], start, end, errmsg );
103 228 : if( errmsg.length()!=0 ) {
104 0 : error( errmsg );
105 : }
106 : AtomNumber astart,aend;
107 228 : pdb.getAtomRange( chains[i], astart, aend, errmsg );
108 228 : if( errmsg.length()!=0 ) {
109 0 : error( errmsg );
110 : }
111 228 : log.printf(" chain named %s contains residues %u to %u and atoms %u to %u \n",chains[i].c_str(),start,end,astart.serial(),aend.serial());
112 : }
113 :
114 : std::string python_bin;
115 236 : parse("PYTHON_BIN",python_bin);
116 118 : if(python_bin=="no") {
117 0 : log<<" python interpreter disabled\n";
118 : } else {
119 236 : pythonCmd=config::getEnvCommand();
120 118 : if(python_bin!="default") {
121 0 : log<<" forcing python interpreter: "<<python_bin<<"\n";
122 0 : pythonCmd+=" env PLUMED_PYTHON_BIN="+python_bin;
123 : }
124 : bool sorted=true;
125 118 : const auto & at=pdb.getAtomNumbers();
126 112805 : for(unsigned i=0; i<at.size(); i++) {
127 112687 : if(at[i].index()!=i) {
128 : sorted=false;
129 : }
130 : }
131 118 : if(!sorted) {
132 2 : log<<" PDB is not sorted, python interpreter will be disabled\n";
133 116 : } else if(!Subprocess::available()) {
134 0 : log<<" subprocess is not available, python interpreter will be disabled\n";
135 : } else {
136 116 : enablePythonInterpreter=true;
137 : }
138 : }
139 118 : }
140 118 : }
141 :
142 1 : std::map<std::string,std::string> GenericMolInfo::getSpecialKeywords() {
143 : std::map<std::string,std::string> mapkeys;
144 1 : mapkeys.insert( std::pair<std::string,std::string>("@mda:","atom selection built using syntax for <a href=\\\"https://docs.mdanalysis.org/stable/documentation_pages/selections.html\\\">MDAnalysis</a>") );
145 1 : mapkeys.insert( std::pair<std::string,std::string>("@mdt:","atom selection built using syntax for <a href=\\\"https://www.mdtraj.org/1.9.8.dev0/index.html\\\">mdtraj</a>") );
146 1 : mapkeys.insert( std::pair<std::string,std::string>("@vmdexec:","atom selection built using syntax for <a href=\\\"https://www.ks.uiuc.edu/Research/vmd/\\\">VMD</a>") );
147 1 : mapkeys.insert( std::pair<std::string,std::string>("@vmd:","atom selection built using syntax for <a href=\\\"https://www.ks.uiuc.edu/Research/vmd/\\\">VMD</a> python module") );
148 1 : mapkeys.insert( std::pair<std::string,std::string>("@nucleic","all atoms that are part of a DNA or RNA molecule") );
149 1 : mapkeys.insert( std::pair<std::string,std::string>("@protein","all atoms that are part of a protein") );
150 1 : mapkeys.insert( std::pair<std::string,std::string>("@water","all water molecules") );
151 1 : mapkeys.insert( std::pair<std::string,std::string>("@ions","all the ions") );
152 1 : mapkeys.insert( std::pair<std::string,std::string>("@hydrogens","all hydrogen atoms") );
153 1 : mapkeys.insert( std::pair<std::string,std::string>("@nonhydrogens","all non hydrogen atoms") );
154 1 : mapkeys.insert( std::pair<std::string,std::string>("@phi-","the four atoms that are required to calculate the phi dihedral for") );
155 1 : mapkeys.insert( std::pair<std::string,std::string>("@psi-","the four atoms that are required to calculate the psi dihedral for") );
156 1 : mapkeys.insert( std::pair<std::string,std::string>("@omega-","the four atoms that are required to calculate the omega dihedral for") );
157 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi1-","the four atoms that are required to calculate the chi1 dihedral for") );
158 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi2-","the four atoms that are required to calculate the chi2 dihedral for") );
159 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi3-","the four atoms that are required to calculate the chi3 dihedral for") );
160 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi4-","the four atoms that are required to calculate the chi4 dihedral for") );
161 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi5-","the four atoms that are required to calculate the chi5 dihedral for") );
162 1 : mapkeys.insert( std::pair<std::string,std::string>("@sidechain-","the protein sidechain atoms in") );
163 1 : mapkeys.insert( std::pair<std::string,std::string>("@back-","the protein/dna/rna backbone atoms in") );
164 1 : mapkeys.insert( std::pair<std::string,std::string>("@alpha-","the four atoms that are required to calculate the alpha backbone dihedral for") );
165 1 : mapkeys.insert( std::pair<std::string,std::string>("@beta-","the four atoms that are required to calculate the beta backbone dihedral for") );
166 1 : mapkeys.insert( std::pair<std::string,std::string>("@gamma-","the four atoms that are required to calculate the gamma backbone dihedral for") );
167 1 : mapkeys.insert( std::pair<std::string,std::string>("@delta-","the four atoms that are required to calculate the delta backbone dihedral for") );
168 1 : mapkeys.insert( std::pair<std::string,std::string>("@epsilon-","the four atoms that are required to calculate the backbone epsilon dihedral for") );
169 1 : mapkeys.insert( std::pair<std::string,std::string>("@zeta-","the four atoms that are required to calculate the zeta backbone dihedral for") );
170 1 : mapkeys.insert( std::pair<std::string,std::string>("@v0-","the four atoms that are required to calculate the v0 sugar dihedral for") );
171 1 : mapkeys.insert( std::pair<std::string,std::string>("@v1-","the four atoms that are required to calculate the v1 sugar dihedral for") );
172 1 : mapkeys.insert( std::pair<std::string,std::string>("@v2-","the four atoms that are required to calculate the v2 sugar dihedral for") );
173 1 : mapkeys.insert( std::pair<std::string,std::string>("@v3-","the four atoms that are required to calculate the v3 sugar dihedral for") );
174 1 : mapkeys.insert( std::pair<std::string,std::string>("@v4-","the four atoms that are required to calculate the v4 sugar dihedral for") );
175 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi-","the four atoms that are required to alcullate the chi dihedral for") );
176 1 : mapkeys.insert( std::pair<std::string,std::string>("@sugar-","the heavy atoms of the sugar in") );
177 1 : mapkeys.insert( std::pair<std::string,std::string>("@base-","the heavy atoms of the base in") );
178 1 : mapkeys.insert( std::pair<std::string,std::string>("@lcs-","an ordered triplet of atoms on the 6-membered ring of the nucleobase in") );
179 1 : return mapkeys;
180 : }
181 :
182 38 : void GenericMolInfo::getBackbone( std::vector<std::string>& restrings, const std::string& fortype, std::vector< std::vector<AtomNumber> >& backbone ) {
183 38 : if( fortype!=mytype ) {
184 0 : error("cannot calculate a variable designed for " + fortype + " molecules for molecule type " + mytype );
185 : }
186 38 : if( MolDataClass::numberOfAtomsPerResidueInBackbone( mytype )==0 ) {
187 0 : error("backbone is not defined for molecule type " + mytype );
188 : }
189 :
190 38 : if( read_backbone.size()!=0 ) {
191 0 : if( restrings.size()!=1 ) {
192 0 : error("cannot interpret anything other than all for residues when using CHAIN keywords");
193 : }
194 0 : if( restrings[0]!="all" ) {
195 0 : error("cannot interpret anything other than all for residues when using CHAIN keywords");
196 : }
197 0 : backbone.resize( read_backbone.size() );
198 0 : for(unsigned i=0; i<read_backbone.size(); ++i) {
199 0 : backbone[i].resize( read_backbone[i].size() );
200 0 : for(unsigned j=0; j<read_backbone[i].size(); ++j) {
201 0 : backbone[i][j]=read_backbone[i][j];
202 : }
203 : }
204 : } else {
205 : bool useter=false; // This is used to deal with terminal groups in WHOLEMOLECULES
206 38 : if( restrings.size()==1 ) {
207 37 : useter=( restrings[0].find("ter")!=std::string::npos );
208 37 : if( restrings[0].find("all")!=std::string::npos ) {
209 : std::vector<std::string> chains;
210 36 : pdb.getChainNames( chains );
211 582 : for(unsigned i=0; i<chains.size(); ++i) {
212 : unsigned r_start, r_end;
213 : std::string errmsg, mm, nn;
214 546 : pdb.getResidueRange( chains[i], r_start, r_end, errmsg );
215 546 : if( !useter ) {
216 546 : std::string resname = pdb.getResidueName( r_start );
217 546 : if( MolDataClass::isTerminalGroup( mytype, resname ) ) {
218 546 : r_start++;
219 : }
220 546 : resname = pdb.getResidueName( r_end );
221 546 : if( MolDataClass::isTerminalGroup( mytype, resname ) ) {
222 546 : r_end--;
223 : }
224 : }
225 546 : Tools::convert(r_start,mm);
226 546 : Tools::convert(r_end,nn);
227 546 : if(i==0) {
228 72 : restrings[0] = mm + "-" + nn;
229 : } else {
230 1020 : restrings.push_back( mm + "-" + nn );
231 : }
232 : }
233 36 : }
234 : }
235 38 : Tools::interpretRanges(restrings);
236 :
237 : // Convert the list of involved residues into a list of segments of chains
238 : int nk, nj;
239 : std::vector< std::vector<unsigned> > segments;
240 : std::vector<unsigned> thissegment;
241 38 : Tools::convert(restrings[0],nk);
242 38 : thissegment.push_back(nk);
243 4404 : for(unsigned i=1; i<restrings.size(); ++i) {
244 4366 : Tools::convert(restrings[i-1],nk);
245 4366 : Tools::convert(restrings[i],nj);
246 12076 : if( (nk+1)!=nj || pdb.getChainID(nk)!=pdb.getChainID(nj) ) {
247 511 : segments.push_back(thissegment);
248 511 : thissegment.resize(0);
249 : }
250 4366 : thissegment.push_back(nj);
251 : }
252 38 : segments.push_back( thissegment );
253 :
254 : // And now get the backbone atoms from each segment
255 38 : backbone.resize( segments.size() );
256 : std::vector<AtomNumber> atomnumbers;
257 587 : for(unsigned i=0; i<segments.size(); ++i) {
258 4953 : for(unsigned j=0; j<segments[i].size(); ++j) {
259 4404 : std::string resname=pdb.getResidueName( segments[i][j] );
260 4404 : if( !MolDataClass::allowedResidue(mytype, resname) ) {
261 : std::string num;
262 0 : Tools::convert( segments[i][j], num );
263 0 : error("residue " + num + " is not recognized for moltype " + mytype );
264 : }
265 4404 : if( !useter && MolDataClass::isTerminalGroup( mytype, resname ) ) {
266 : std::string num;
267 0 : Tools::convert( segments[i][j], num );
268 0 : error("residue " + num + " appears to be a terminal group");
269 : }
270 4404 : if( resname=="GLY" ) {
271 0 : warning("GLY residues are achiral - assuming HA1 atom is in CB position");
272 : }
273 4404 : MolDataClass::getBackboneForResidue( mytype, segments[i][j], pdb, atomnumbers );
274 4404 : if( atomnumbers.size()==0 ) {
275 : std::string num;
276 0 : Tools::convert( segments[i][j], num );
277 0 : error("Could not find required backbone atom in residue number " + num );
278 : } else {
279 26424 : for(unsigned k=0; k<atomnumbers.size(); ++k) {
280 22020 : backbone[i].push_back( atomnumbers[k] );
281 : }
282 : }
283 4404 : atomnumbers.resize(0);
284 : }
285 : }
286 38 : }
287 38 : }
288 :
289 787 : void GenericMolInfo::interpretSymbol( const std::string& symbol, std::vector<AtomNumber>& atoms ) {
290 3094 : if(Tools::startWith(symbol,"mdt:") || Tools::startWith(symbol,"mda:") || Tools::startWith(symbol,"vmd:") || Tools::startWith(symbol,"vmdexec:")) {
291 :
292 24 : plumed_assert(enablePythonInterpreter);
293 :
294 48 : log<<" symbol " + symbol + " will be sent to python interpreter\n";
295 24 : if(!selector_running) {
296 3 : log<<" MOLINFO "<<getLabel()<<": starting python interpreter\n";
297 3 : if(comm.Get_rank()==0) {
298 4 : selector=Tools::make_unique<Subprocess>(pythonCmd+" \""+config::getPlumedRoot()+"\"/scripts/selector.sh --pdb " + reference);
299 2 : selector->stop();
300 : }
301 3 : selector_running=true;
302 : }
303 :
304 24 : atoms.resize(0);
305 :
306 24 : if(comm.Get_rank()==0) {
307 16 : int ok=0;
308 : std::string error_msg;
309 : // this is a complicated way to have the exception propagated with MPI.
310 : // It is necessary since only one process calls the selector.
311 : // Probably I should recycle the exception propagation at library boundaries
312 : // to allow transferring the exception to other processes.
313 : try {
314 16 : plumed_assert(selector) << "Python interpreter is disabled, selection " + symbol + " cannot be interpreted";
315 : auto h=selector->contStop(); // stops again when it goes out of scope
316 16 : (*selector) << symbol << "\n";
317 16 : selector->flush();
318 : std::string res;
319 : std::vector<std::string> words;
320 : while(true) {
321 16 : selector->getline(res);
322 32 : words=Tools::getWords(res);
323 32 : if(!words.empty() && words[0]=="Error") {
324 0 : plumed_error()<<res;
325 : }
326 32 : if(!words.empty() && words[0]=="Selection:") {
327 : break;
328 : }
329 : }
330 : words.erase(words.begin());
331 712 : for(const auto & w : words) {
332 : int n;
333 696 : if(w.empty()) {
334 0 : continue;
335 : }
336 696 : Tools::convert(w,n);
337 1392 : atoms.push_back(AtomNumber::serial(n));
338 : }
339 16 : ok=1;
340 32 : } catch (const Exception & e) {
341 0 : error_msg=e.what();
342 0 : }
343 16 : comm.Bcast(ok,0);
344 16 : if(!ok) {
345 0 : size_t s=error_msg.length();
346 0 : comm.Bcast(s,0);
347 0 : comm.Bcast(error_msg,0);
348 0 : throw Exception()<<error_msg;
349 : }
350 16 : size_t nat=atoms.size();
351 16 : comm.Bcast(nat,0);
352 16 : comm.Bcast(atoms,0);
353 : } else {
354 8 : int ok=0;
355 : std::string error_msg;
356 8 : comm.Bcast(ok,0);
357 8 : if(!ok) {
358 : size_t s;
359 0 : comm.Bcast(s,0);
360 0 : error_msg.resize(s);
361 0 : comm.Bcast(error_msg,0);
362 0 : throw Exception()<<error_msg;
363 : }
364 8 : size_t nat=0;
365 8 : comm.Bcast(nat,0);
366 8 : atoms.resize(nat);
367 8 : comm.Bcast(atoms,0);
368 : }
369 24 : log<<" selection interpreted using ";
370 48 : if(Tools::startWith(symbol,"mdt:")) {
371 12 : log<<"mdtraj "<<cite("McGibbon et al, Biophys. J., 109, 1528 (2015)")<<"\n";
372 : }
373 48 : if(Tools::startWith(symbol,"mda:")) {
374 36 : log<<"MDAnalysis "<<cite("Gowers et al, Proceedings of the 15th Python in Science Conference, doi:10.25080/majora-629e541a-00e (2016)")<<"\n";
375 : }
376 48 : if(Tools::startWith(symbol,"vmdexec:")) {
377 0 : log<<"VMD "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
378 : }
379 48 : if(Tools::startWith(symbol,"vmd:")) {
380 0 : log<<"VMD (github.com/Eigenstate/vmd-python) "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
381 : }
382 24 : return;
383 : }
384 763 : MolDataClass::specialSymbol( mytype, symbol, pdb, atoms );
385 763 : if(atoms.empty()) {
386 0 : error(symbol + " not found in your MOLINFO structure");
387 : }
388 : }
389 :
390 74117 : std::string GenericMolInfo::getAtomName(AtomNumber a)const {
391 74117 : return pdb.getAtomName(a);
392 : }
393 :
394 249 : bool GenericMolInfo::checkForAtom(AtomNumber a)const {
395 249 : return pdb.checkForAtom(a);
396 : }
397 :
398 61001 : unsigned GenericMolInfo::getResidueNumber(AtomNumber a)const {
399 61001 : return pdb.getResidueNumber(a);
400 : }
401 :
402 17337 : unsigned GenericMolInfo::getPDBsize()const {
403 17337 : return pdb.size();
404 : }
405 :
406 14949 : std::string GenericMolInfo::getResidueName(AtomNumber a)const {
407 14949 : return pdb.getResidueName(a);
408 : }
409 :
410 0 : std::string GenericMolInfo::getChainID(AtomNumber a)const {
411 0 : return pdb.getChainID(a);
412 : }
413 :
414 186 : Vector GenericMolInfo::getPosition(AtomNumber a)const {
415 186 : return pdb.getPosition(a);
416 : }
417 :
418 29189 : bool GenericMolInfo::isWhole() const {
419 29189 : return iswhole_;
420 : }
421 :
422 7442 : void GenericMolInfo::prepare() {
423 7442 : if(selector_running) {
424 3 : log<<" MOLINFO "<<getLabel()<<": killing python interpreter\n";
425 : selector.reset();
426 3 : selector_running=false;
427 : }
428 7442 : }
429 :
430 : }
|