Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2012-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "GenericMolInfo.h"
23 : #include "ActionRegister.h"
24 : #include "ActionSet.h"
25 : #include "PlumedMain.h"
26 : #include "tools/MolDataClass.h"
27 : #include "tools/PDB.h"
28 : #include "tools/Communicator.h"
29 : #include "config/Config.h"
30 :
31 : namespace PLMD {
32 :
33 :
34 : /*
35 : This action is defined in core/ as it is used by other actions.
36 : Anyway, it is registered in setup/, so that excluding that module from
37 : compilation will exclude it from plumed.
38 : */
39 :
40 :
41 100 : void GenericMolInfo::registerKeywords( Keywords& keys ) {
42 100 : ActionSetup::registerKeywords(keys);
43 200 : keys.add("compulsory","STRUCTURE","a file in pdb format containing a reference structure. "
44 : "This is used to defines the atoms in the various residues, chains, etc . "
45 : "For more details on the PDB file format visit http://www.wwpdb.org/docs.html");
46 200 : keys.add("compulsory","MOLTYPE","protein","what kind of molecule is contained in the pdb file - usually not needed since protein/RNA/DNA are compatible");
47 200 : keys.add("compulsory","PYTHON_BIN","default","python interpreter");
48 200 : keys.add("atoms","CHAIN","(for masochists ( mostly Davide Branduardi ) ) The atoms involved in each of the chains of interest in the structure.");
49 200 : keys.add("hidden","STRIDE","frequency for resetting the python interpreter. Should be 1.");
50 200 : keys.addFlag("WHOLE", false, "The reference structure is whole, i.e. not broken by PBC");
51 100 : }
52 :
53 196 : GenericMolInfo::~GenericMolInfo() {
54 : // empty destructor to delete unique_ptr
55 294 : }
56 :
57 98 : GenericMolInfo::GenericMolInfo( const ActionOptions&ao ):
58 : Action(ao),
59 : ActionAnyorder(ao),
60 : ActionPilot(ao),
61 : ActionAtomistic(ao),
62 98 : iswhole_(false)
63 : {
64 98 : plumed_assert(getStride()==1);
65 : // Read what is contained in the pdb file
66 98 : parse("MOLTYPE",mytype);
67 :
68 : // check if whole
69 98 : parseFlag("WHOLE", iswhole_);
70 :
71 98 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
72 98 : if( moldat ) log<<" overriding last MOLINFO with label " << moldat->getLabel()<<"\n";
73 :
74 : std::vector<AtomNumber> backbone;
75 196 : parseAtomList("CHAIN",backbone);
76 98 : if( read_backbone.size()==0 ) {
77 0 : for(unsigned i=1;; ++i) {
78 196 : parseAtomList("CHAIN",i,backbone);
79 98 : if( backbone.size()==0 ) break;
80 0 : read_backbone.push_back(backbone);
81 0 : backbone.resize(0);
82 : }
83 : } else {
84 0 : read_backbone.push_back(backbone);
85 : }
86 98 : if( read_backbone.size()==0 ) {
87 98 : parse("STRUCTURE",reference);
88 :
89 98 : if( ! pdb.read(reference,usingNaturalUnits(),0.1/getUnits().getLength()))plumed_merror("missing input file " + reference );
90 :
91 98 : std::vector<std::string> chains; pdb.getChainNames( chains );
92 98 : log.printf(" pdb file named %s contains %u chains \n",reference.c_str(), static_cast<unsigned>(chains.size()));
93 289 : for(unsigned i=0; i<chains.size(); ++i) {
94 : unsigned start,end; std::string errmsg;
95 191 : pdb.getResidueRange( chains[i], start, end, errmsg );
96 191 : if( errmsg.length()!=0 ) error( errmsg );
97 : AtomNumber astart,aend;
98 191 : pdb.getAtomRange( chains[i], astart, aend, errmsg );
99 191 : if( errmsg.length()!=0 ) error( errmsg );
100 191 : log.printf(" chain named %s contains residues %u to %u and atoms %u to %u \n",chains[i].c_str(),start,end,astart.serial(),aend.serial());
101 : }
102 :
103 : std::string python_bin;
104 196 : parse("PYTHON_BIN",python_bin);
105 98 : if(python_bin=="no") {
106 0 : log<<" python interpreter disabled\n";
107 : } else {
108 196 : pythonCmd=config::getEnvCommand();
109 98 : if(python_bin!="default") {
110 0 : log<<" forcing python interpreter: "<<python_bin<<"\n";
111 0 : pythonCmd+=" env PLUMED_PYTHON_BIN="+python_bin;
112 : }
113 : bool sorted=true;
114 98 : const auto & at=pdb.getAtomNumbers();
115 102460 : for(unsigned i=0; i<at.size(); i++) {
116 102362 : if(at[i].index()!=i) sorted=false;
117 : }
118 98 : if(!sorted) {
119 2 : log<<" PDB is not sorted, python interpreter will be disabled\n";
120 96 : } else if(!Subprocess::available()) {
121 0 : log<<" subprocess is not available, python interpreter will be disabled\n";
122 : } else {
123 96 : enablePythonInterpreter=true;
124 : }
125 : }
126 98 : }
127 98 : }
128 :
129 1 : std::map<std::string,std::string> GenericMolInfo::getSpecialKeywords() {
130 : std::map<std::string,std::string> mapkeys;
131 1 : mapkeys.insert( std::pair<std::string,std::string>("@mda:","atom selection built using syntax for <a href=\\\"https://docs.mdanalysis.org/stable/documentation_pages/selections.html\\\">MDAnalysis</a>") );
132 1 : mapkeys.insert( std::pair<std::string,std::string>("@mdt:","atom selection built using syntax for <a href=\\\"https://www.mdtraj.org/1.9.8.dev0/index.html\\\">mdtraj</a>") );
133 1 : mapkeys.insert( std::pair<std::string,std::string>("@vmdexec:","atom selection built using syntax for <a href=\\\"https://www.ks.uiuc.edu/Research/vmd/\\\">VMD</a>") );
134 1 : mapkeys.insert( std::pair<std::string,std::string>("@vmd:","atom selection built using syntax for <a href=\\\"https://www.ks.uiuc.edu/Research/vmd/\\\">VMD</a> python module") );
135 1 : mapkeys.insert( std::pair<std::string,std::string>("@nucleic","all atoms that are part of a DNA or RNA molecule") );
136 1 : mapkeys.insert( std::pair<std::string,std::string>("@protein","all atoms that are part of a protein") );
137 1 : mapkeys.insert( std::pair<std::string,std::string>("@water","all water molecules") );
138 1 : mapkeys.insert( std::pair<std::string,std::string>("@ions","all the ions") );
139 1 : mapkeys.insert( std::pair<std::string,std::string>("@hydrogens","all hydrogen atoms") );
140 1 : mapkeys.insert( std::pair<std::string,std::string>("@nonhydrogens","all non hydrogen atoms") );
141 1 : mapkeys.insert( std::pair<std::string,std::string>("@phi-","the four atoms that are required to calculate the phi dihedral for") );
142 1 : mapkeys.insert( std::pair<std::string,std::string>("@psi-","the four atoms that are required to calculate the psi dihedral for") );
143 1 : mapkeys.insert( std::pair<std::string,std::string>("@omega-","the four atoms that are required to calculate the omega dihedral for") );
144 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi1-","the four atoms that are required to calculate the chi1 dihedral for") );
145 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi2-","the four atoms that are required to calculate the chi2 dihedral for") );
146 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi3-","the four atoms that are required to calculate the chi3 dihedral for") );
147 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi4-","the four atoms that are required to calculate the chi4 dihedral for") );
148 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi5-","the four atoms that are required to calculate the chi5 dihedral for") );
149 1 : mapkeys.insert( std::pair<std::string,std::string>("@sidechain-","the protein sidechain atoms in") );
150 1 : mapkeys.insert( std::pair<std::string,std::string>("@back-","the protein/dna/rna backbone atoms in") );
151 1 : mapkeys.insert( std::pair<std::string,std::string>("@alpha-","the four atoms that are required to calculate the alpha backbone dihedral for") );
152 1 : mapkeys.insert( std::pair<std::string,std::string>("@beta-","the four atoms that are required to calculate the beta backbone dihedral for") );
153 1 : mapkeys.insert( std::pair<std::string,std::string>("@gamma-","the four atoms that are required to calculate the gamma backbone dihedral for") );
154 1 : mapkeys.insert( std::pair<std::string,std::string>("@delta-","the four atoms that are required to calculate the delta backbone dihedral for") );
155 1 : mapkeys.insert( std::pair<std::string,std::string>("@epsilon-","the four atoms that are required to calculate the backbone epsilon dihedral for") );
156 1 : mapkeys.insert( std::pair<std::string,std::string>("@zeta-","the four atoms that are required to calculate the zeta backbone dihedral for") );
157 1 : mapkeys.insert( std::pair<std::string,std::string>("@v0-","the four atoms that are required to calculate the v0 sugar dihedral for") );
158 1 : mapkeys.insert( std::pair<std::string,std::string>("@v1-","the four atoms that are required to calculate the v1 sugar dihedral for") );
159 1 : mapkeys.insert( std::pair<std::string,std::string>("@v2-","the four atoms that are required to calculate the v2 sugar dihedral for") );
160 1 : mapkeys.insert( std::pair<std::string,std::string>("@v3-","the four atoms that are required to calculate the v3 sugar dihedral for") );
161 1 : mapkeys.insert( std::pair<std::string,std::string>("@v4-","the four atoms that are required to calculate the v4 sugar dihedral for") );
162 1 : mapkeys.insert( std::pair<std::string,std::string>("@chi-","the four atoms that are required to alcullate the chi dihedral for") );
163 1 : mapkeys.insert( std::pair<std::string,std::string>("@sugar-","the heavy atoms of the sugar in") );
164 1 : mapkeys.insert( std::pair<std::string,std::string>("@base-","the heavy atoms of the base in") );
165 1 : mapkeys.insert( std::pair<std::string,std::string>("@lcs-","an ordered triplet of atoms on the 6-membered ring of the nucleobase in") );
166 1 : return mapkeys;
167 : }
168 :
169 32 : void GenericMolInfo::getBackbone( std::vector<std::string>& restrings, const std::string& fortype, std::vector< std::vector<AtomNumber> >& backbone ) {
170 32 : if( fortype!=mytype ) error("cannot calculate a variable designed for " + fortype + " molecules for molecule type " + mytype );
171 32 : if( MolDataClass::numberOfAtomsPerResidueInBackbone( mytype )==0 ) error("backbone is not defined for molecule type " + mytype );
172 :
173 32 : if( read_backbone.size()!=0 ) {
174 0 : if( restrings.size()!=1 ) error("cannot interpret anything other than all for residues when using CHAIN keywords");
175 0 : if( restrings[0]!="all" ) error("cannot interpret anything other than all for residues when using CHAIN keywords");
176 0 : backbone.resize( read_backbone.size() );
177 0 : for(unsigned i=0; i<read_backbone.size(); ++i) {
178 0 : backbone[i].resize( read_backbone[i].size() );
179 0 : for(unsigned j=0; j<read_backbone[i].size(); ++j) backbone[i][j]=read_backbone[i][j];
180 : }
181 : } else {
182 : bool useter=false; // This is used to deal with terminal groups in WHOLEMOLECULES
183 32 : if( restrings.size()==1 ) {
184 31 : useter=( restrings[0].find("ter")!=std::string::npos );
185 31 : if( restrings[0].find("all")!=std::string::npos ) {
186 30 : std::vector<std::string> chains; pdb.getChainNames( chains );
187 468 : for(unsigned i=0; i<chains.size(); ++i) {
188 : unsigned r_start, r_end; std::string errmsg, mm, nn;
189 438 : pdb.getResidueRange( chains[i], r_start, r_end, errmsg );
190 438 : if( !useter ) {
191 438 : std::string resname = pdb.getResidueName( r_start );
192 438 : if( MolDataClass::isTerminalGroup( mytype, resname ) ) r_start++;
193 438 : resname = pdb.getResidueName( r_end );
194 438 : if( MolDataClass::isTerminalGroup( mytype, resname ) ) r_end--;
195 : }
196 438 : Tools::convert(r_start,mm); Tools::convert(r_end,nn);
197 468 : if(i==0) restrings[0] = mm + "-" + nn;
198 816 : else restrings.push_back( mm + "-" + nn );
199 : }
200 30 : }
201 : }
202 32 : Tools::interpretRanges(restrings);
203 :
204 : // Convert the list of involved residues into a list of segments of chains
205 : int nk, nj; std::vector< std::vector<unsigned> > segments;
206 : std::vector<unsigned> thissegment;
207 32 : Tools::convert(restrings[0],nk); thissegment.push_back(nk);
208 3540 : for(unsigned i=1; i<restrings.size(); ++i) {
209 3508 : Tools::convert(restrings[i-1],nk);
210 3508 : Tools::convert(restrings[i],nj);
211 9706 : if( (nk+1)!=nj || pdb.getChainID(nk)!=pdb.getChainID(nj) ) {
212 409 : segments.push_back(thissegment);
213 409 : thissegment.resize(0);
214 : }
215 3508 : thissegment.push_back(nj);
216 : }
217 32 : segments.push_back( thissegment );
218 :
219 : // And now get the backbone atoms from each segment
220 32 : backbone.resize( segments.size() );
221 : std::vector<AtomNumber> atomnumbers;
222 473 : for(unsigned i=0; i<segments.size(); ++i) {
223 3981 : for(unsigned j=0; j<segments[i].size(); ++j) {
224 3540 : std::string resname=pdb.getResidueName( segments[i][j] );
225 3540 : if( !MolDataClass::allowedResidue(mytype, resname) ) {
226 0 : std::string num; Tools::convert( segments[i][j], num );
227 0 : error("residue " + num + " is not recognized for moltype " + mytype );
228 : }
229 3540 : if( !useter && MolDataClass::isTerminalGroup( mytype, resname ) ) {
230 0 : std::string num; Tools::convert( segments[i][j], num );
231 0 : error("residue " + num + " appears to be a terminal group");
232 : }
233 3540 : if( resname=="GLY" ) warning("GLY residues are achiral - assuming HA1 atom is in CB position");
234 3540 : MolDataClass::getBackboneForResidue( mytype, segments[i][j], pdb, atomnumbers );
235 3540 : if( atomnumbers.size()==0 ) {
236 0 : std::string num; Tools::convert( segments[i][j], num );
237 0 : error("Could not find required backbone atom in residue number " + num );
238 : } else {
239 21240 : for(unsigned k=0; k<atomnumbers.size(); ++k) backbone[i].push_back( atomnumbers[k] );
240 : }
241 3540 : atomnumbers.resize(0);
242 : }
243 : }
244 32 : }
245 32 : }
246 :
247 661 : void GenericMolInfo::interpretSymbol( const std::string& symbol, std::vector<AtomNumber>& atoms ) {
248 2590 : if(Tools::startWith(symbol,"mdt:") || Tools::startWith(symbol,"mda:") || Tools::startWith(symbol,"vmd:") || Tools::startWith(symbol,"vmdexec:")) {
249 :
250 24 : plumed_assert(enablePythonInterpreter);
251 :
252 48 : log<<" symbol " + symbol + " will be sent to python interpreter\n";
253 24 : if(!selector_running) {
254 3 : log<<" MOLINFO "<<getLabel()<<": starting python interpreter\n";
255 3 : if(comm.Get_rank()==0) {
256 4 : selector=Tools::make_unique<Subprocess>(pythonCmd+" \""+config::getPlumedRoot()+"\"/scripts/selector.sh --pdb " + reference);
257 2 : selector->stop();
258 : }
259 3 : selector_running=true;
260 : }
261 :
262 24 : atoms.resize(0);
263 :
264 24 : if(comm.Get_rank()==0) {
265 16 : int ok=0;
266 : std::string error_msg;
267 : // this is a complicated way to have the exception propagated with MPI.
268 : // It is necessary since only one process calls the selector.
269 : // Probably I should recycle the exception propagation at library boundaries
270 : // to allow transferring the exception to other processes.
271 : try {
272 16 : plumed_assert(selector) << "Python interpreter is disabled, selection " + symbol + " cannot be interpreted";
273 : auto h=selector->contStop(); // stops again when it goes out of scope
274 16 : (*selector) << symbol << "\n";
275 16 : selector->flush();
276 : std::string res;
277 : std::vector<std::string> words;
278 : while(true) {
279 16 : selector->getline(res);
280 32 : words=Tools::getWords(res);
281 32 : if(!words.empty() && words[0]=="Error") plumed_error()<<res;
282 32 : if(!words.empty() && words[0]=="Selection:") break;
283 : }
284 : words.erase(words.begin());
285 712 : for(const auto & w : words) {
286 : int n;
287 696 : if(w.empty()) continue;
288 696 : Tools::convert(w,n);
289 1392 : atoms.push_back(AtomNumber::serial(n));
290 : }
291 16 : ok=1;
292 32 : } catch (const Exception & e) {
293 0 : error_msg=e.what();
294 0 : }
295 16 : comm.Bcast(ok,0);
296 16 : if(!ok) {
297 0 : size_t s=error_msg.length();
298 0 : comm.Bcast(s,0);
299 0 : comm.Bcast(error_msg,0);
300 0 : throw Exception()<<error_msg;
301 : }
302 16 : size_t nat=atoms.size();
303 16 : comm.Bcast(nat,0);
304 16 : comm.Bcast(atoms,0);
305 : } else {
306 8 : int ok=0;
307 : std::string error_msg;
308 8 : comm.Bcast(ok,0);
309 8 : if(!ok) {
310 : size_t s;
311 0 : comm.Bcast(s,0);
312 0 : error_msg.resize(s);
313 0 : comm.Bcast(error_msg,0);
314 0 : throw Exception()<<error_msg;
315 : }
316 8 : size_t nat=0;
317 8 : comm.Bcast(nat,0);
318 8 : atoms.resize(nat);
319 8 : comm.Bcast(atoms,0);
320 : }
321 24 : log<<" selection interpreted using ";
322 54 : if(Tools::startWith(symbol,"mdt:")) log<<"mdtraj "<<cite("McGibbon et al, Biophys. J., 109, 1528 (2015)")<<"\n";
323 66 : if(Tools::startWith(symbol,"mda:")) log<<"MDAnalysis "<<cite("Gowers et al, Proceedings of the 15th Python in Science Conference, doi:10.25080/majora-629e541a-00e (2016)")<<"\n";
324 48 : if(Tools::startWith(symbol,"vmdexec:")) log<<"VMD "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
325 48 : if(Tools::startWith(symbol,"vmd:")) log<<"VMD (github.com/Eigenstate/vmd-python) "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
326 24 : return;
327 : }
328 637 : MolDataClass::specialSymbol( mytype, symbol, pdb, atoms );
329 637 : if(atoms.empty()) error(symbol + " not found in your MOLINFO structure");
330 : }
331 :
332 68097 : std::string GenericMolInfo::getAtomName(AtomNumber a)const {
333 68097 : return pdb.getAtomName(a);
334 : }
335 :
336 249 : bool GenericMolInfo::checkForAtom(AtomNumber a)const {
337 249 : return pdb.checkForAtom(a);
338 : }
339 :
340 61001 : unsigned GenericMolInfo::getResidueNumber(AtomNumber a)const {
341 61001 : return pdb.getResidueNumber(a);
342 : }
343 :
344 17337 : unsigned GenericMolInfo::getPDBsize()const {
345 17337 : return pdb.size();
346 : }
347 :
348 14949 : std::string GenericMolInfo::getResidueName(AtomNumber a)const {
349 14949 : return pdb.getResidueName(a);
350 : }
351 :
352 0 : std::string GenericMolInfo::getChainID(AtomNumber a)const {
353 0 : return pdb.getChainID(a);
354 : }
355 :
356 186 : Vector GenericMolInfo::getPosition(AtomNumber a)const {
357 186 : return pdb.getPosition(a);
358 : }
359 :
360 2 : bool GenericMolInfo::isWhole() const {
361 2 : return iswhole_;
362 : }
363 :
364 5357 : void GenericMolInfo::prepare() {
365 5357 : if(selector_running) {
366 3 : log<<" MOLINFO "<<getLabel()<<": killing python interpreter\n";
367 : selector.reset();
368 3 : selector_running=false;
369 : }
370 5357 : }
371 :
372 : }
|