Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2012-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "GenericMolInfo.h"
23 : #include "Atoms.h"
24 : #include "ActionRegister.h"
25 : #include "ActionSet.h"
26 : #include "PlumedMain.h"
27 : #include "tools/MolDataClass.h"
28 : #include "tools/PDB.h"
29 : #include "config/Config.h"
30 :
31 : namespace PLMD {
32 :
33 :
34 : /*
35 : This action is defined in core/ as it is used by other actions.
36 : Anyway, it is registered in setup/, so that excluding that module from
37 : compilation will exclude it from plumed.
38 : */
39 :
40 :
41 85 : void GenericMolInfo::registerKeywords( Keywords& keys ) {
42 85 : ActionSetup::registerKeywords(keys);
43 170 : keys.add("compulsory","STRUCTURE","a file in pdb format containing a reference structure. "
44 : "This is used to defines the atoms in the various residues, chains, etc . "
45 : "For more details on the PDB file format visit http://www.wwpdb.org/docs.html");
46 170 : keys.add("compulsory","MOLTYPE","protein","what kind of molecule is contained in the pdb file - usually not needed since protein/RNA/DNA are compatible");
47 170 : keys.add("compulsory","PYTHON_BIN","default","python interpreter");
48 170 : keys.add("atoms","CHAIN","(for masochists ( mostly Davide Branduardi ) ) The atoms involved in each of the chains of interest in the structure.");
49 170 : keys.add("hidden","STRIDE","frequency for resetting the python interpreter. Should be 1.");
50 170 : keys.addFlag("WHOLE", false, "The reference structure is whole, i.e. not broken by PBC");
51 85 : }
52 :
53 168 : GenericMolInfo::~GenericMolInfo() {
54 : // empty destructor to delete unique_ptr
55 336 : }
56 :
57 84 : GenericMolInfo::GenericMolInfo( const ActionOptions&ao ):
58 : Action(ao),
59 : ActionAnyorder(ao),
60 : ActionPilot(ao),
61 : ActionAtomistic(ao),
62 84 : iswhole_(false)
63 : {
64 84 : plumed_assert(getStride()==1);
65 : // Read what is contained in the pdb file
66 84 : parse("MOLTYPE",mytype);
67 :
68 : // check if whole
69 84 : parseFlag("WHOLE", iswhole_);
70 :
71 84 : auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
72 84 : if( moldat ) log<<" overriding last MOLINFO with label " << moldat->getLabel()<<"\n";
73 :
74 : std::vector<AtomNumber> backbone;
75 168 : parseAtomList("CHAIN",backbone);
76 84 : if( read_backbone.size()==0 ) {
77 0 : for(unsigned i=1;; ++i) {
78 168 : parseAtomList("CHAIN",i,backbone);
79 84 : if( backbone.size()==0 ) break;
80 0 : read_backbone.push_back(backbone);
81 0 : backbone.resize(0);
82 : }
83 : } else {
84 0 : read_backbone.push_back(backbone);
85 : }
86 84 : if( read_backbone.size()==0 ) {
87 84 : parse("STRUCTURE",reference);
88 :
89 168 : if( ! pdb.read(reference,plumed.getAtoms().usingNaturalUnits(),0.1/plumed.getAtoms().getUnits().getLength()))plumed_merror("missing input file " + reference );
90 :
91 84 : std::vector<std::string> chains; pdb.getChainNames( chains );
92 84 : log.printf(" pdb file named %s contains %u chains \n",reference.c_str(), static_cast<unsigned>(chains.size()));
93 242 : for(unsigned i=0; i<chains.size(); ++i) {
94 : unsigned start,end; std::string errmsg;
95 158 : pdb.getResidueRange( chains[i], start, end, errmsg );
96 158 : if( errmsg.length()!=0 ) error( errmsg );
97 : AtomNumber astart,aend;
98 158 : pdb.getAtomRange( chains[i], astart, aend, errmsg );
99 158 : if( errmsg.length()!=0 ) error( errmsg );
100 158 : log.printf(" chain named %s contains residues %u to %u and atoms %u to %u \n",chains[i].c_str(),start,end,astart.serial(),aend.serial());
101 : }
102 :
103 : std::string python_bin;
104 168 : parse("PYTHON_BIN",python_bin);
105 84 : if(python_bin=="no") {
106 0 : log<<" python interpreter disabled\n";
107 : } else {
108 168 : pythonCmd=config::getEnvCommand();
109 84 : if(python_bin!="default") {
110 0 : log<<" forcing python interpreter: "<<python_bin<<"\n";
111 0 : pythonCmd+=" env PLUMED_PYTHON_BIN="+python_bin;
112 : }
113 : bool sorted=true;
114 84 : const auto & at=pdb.getAtomNumbers();
115 89826 : for(unsigned i=0; i<at.size(); i++) {
116 89742 : if(at[i].index()!=i) sorted=false;
117 : }
118 84 : if(!sorted) {
119 1 : log<<" PDB is not sorted, python interpreter will be disabled\n";
120 83 : } else if(!Subprocess::available()) {
121 0 : log<<" subprocess is not available, python interpreter will be disabled\n";
122 : } else {
123 83 : enablePythonInterpreter=true;
124 : }
125 : }
126 84 : }
127 84 : }
128 :
129 25 : void GenericMolInfo::getBackbone( std::vector<std::string>& restrings, const std::string& fortype, std::vector< std::vector<AtomNumber> >& backbone ) {
130 25 : if( fortype!=mytype ) error("cannot calculate a variable designed for " + fortype + " molecules for molecule type " + mytype );
131 25 : if( MolDataClass::numberOfAtomsPerResidueInBackbone( mytype )==0 ) error("backbone is not defined for molecule type " + mytype );
132 :
133 25 : if( read_backbone.size()!=0 ) {
134 0 : if( restrings.size()!=1 ) error("cannot interpret anything other than all for residues when using CHAIN keywords");
135 0 : if( restrings[0]!="all" ) error("cannot interpret anything other than all for residues when using CHAIN keywords");
136 0 : backbone.resize( read_backbone.size() );
137 0 : for(unsigned i=0; i<read_backbone.size(); ++i) {
138 0 : backbone[i].resize( read_backbone[i].size() );
139 0 : for(unsigned j=0; j<read_backbone[i].size(); ++j) backbone[i][j]=read_backbone[i][j];
140 : }
141 : } else {
142 : bool useter=false; // This is used to deal with terminal groups in WHOLEMOLECULES
143 25 : if( restrings.size()==1 ) {
144 23 : useter=( restrings[0].find("ter")!=std::string::npos );
145 23 : if( restrings[0].find("all")!=std::string::npos ) {
146 21 : std::vector<std::string> chains; pdb.getChainNames( chains );
147 348 : for(unsigned i=0; i<chains.size(); ++i) {
148 : unsigned r_start, r_end; std::string errmsg, mm, nn;
149 327 : pdb.getResidueRange( chains[i], r_start, r_end, errmsg );
150 327 : if( !useter ) {
151 327 : std::string resname = pdb.getResidueName( r_start );
152 327 : if( MolDataClass::isTerminalGroup( mytype, resname ) ) r_start++;
153 327 : resname = pdb.getResidueName( r_end );
154 327 : if( MolDataClass::isTerminalGroup( mytype, resname ) ) r_end--;
155 : }
156 327 : Tools::convert(r_start,mm); Tools::convert(r_end,nn);
157 348 : if(i==0) restrings[0] = mm + "-" + nn;
158 612 : else restrings.push_back( mm + "-" + nn );
159 : }
160 21 : }
161 : }
162 25 : Tools::interpretRanges(restrings);
163 :
164 : // Convert the list of involved residues into a list of segments of chains
165 : int nk, nj; std::vector< std::vector<unsigned> > segments;
166 : std::vector<unsigned> thissegment;
167 25 : Tools::convert(restrings[0],nk); thissegment.push_back(nk);
168 2652 : for(unsigned i=1; i<restrings.size(); ++i) {
169 2627 : Tools::convert(restrings[i-1],nk);
170 2627 : Tools::convert(restrings[i],nj);
171 7265 : if( (nk+1)!=nj || pdb.getChainID(nk)!=pdb.getChainID(nj) ) {
172 308 : segments.push_back(thissegment);
173 308 : thissegment.resize(0);
174 : }
175 2627 : thissegment.push_back(nj);
176 : }
177 25 : segments.push_back( thissegment );
178 :
179 : // And now get the backbone atoms from each segment
180 25 : backbone.resize( segments.size() );
181 : std::vector<AtomNumber> atomnumbers;
182 358 : for(unsigned i=0; i<segments.size(); ++i) {
183 2985 : for(unsigned j=0; j<segments[i].size(); ++j) {
184 2652 : std::string resname=pdb.getResidueName( segments[i][j] );
185 2652 : if( !MolDataClass::allowedResidue(mytype, resname) ) {
186 0 : std::string num; Tools::convert( segments[i][j], num );
187 0 : error("residue " + num + " is not recognized for moltype " + mytype );
188 : }
189 2652 : if( !useter && MolDataClass::isTerminalGroup( mytype, resname ) ) {
190 0 : std::string num; Tools::convert( segments[i][j], num );
191 0 : error("residue " + num + " appears to be a terminal group");
192 : }
193 2652 : if( resname=="GLY" ) warning("GLY residues are achiral - assuming HA1 atom is in CB position");
194 2652 : MolDataClass::getBackboneForResidue( mytype, segments[i][j], pdb, atomnumbers );
195 2652 : if( atomnumbers.size()==0 ) {
196 0 : std::string num; Tools::convert( segments[i][j], num );
197 0 : error("Could not find required backbone atom in residue number " + num );
198 : } else {
199 15912 : for(unsigned k=0; k<atomnumbers.size(); ++k) backbone[i].push_back( atomnumbers[k] );
200 : }
201 2652 : atomnumbers.resize(0);
202 : }
203 : }
204 25 : }
205 25 : }
206 :
207 617 : void GenericMolInfo::interpretSymbol( const std::string& symbol, std::vector<AtomNumber>& atoms ) {
208 2414 : if(Tools::startWith(symbol,"mdt:") || Tools::startWith(symbol,"mda:") || Tools::startWith(symbol,"vmd:") || Tools::startWith(symbol,"vmdexec:")) {
209 :
210 24 : plumed_assert(enablePythonInterpreter);
211 :
212 48 : log<<" symbol " + symbol + " will be sent to python interpreter\n";
213 24 : if(!selector_running) {
214 3 : log<<" MOLINFO "<<getLabel()<<": starting python interpreter\n";
215 3 : if(comm.Get_rank()==0) {
216 8 : selector=Tools::make_unique<Subprocess>(pythonCmd+" \""+config::getPlumedRoot()+"\"/scripts/selector.sh --pdb " + reference);
217 2 : selector->stop();
218 : }
219 3 : selector_running=true;
220 : }
221 :
222 24 : atoms.resize(0);
223 :
224 24 : if(comm.Get_rank()==0) {
225 16 : int ok=0;
226 : std::string error_msg;
227 : // this is a complicated way to have the exception propagated with MPI.
228 : // It is necessary since only one process calls the selector.
229 : // Probably I should recycle the exception propagation at library boundaries
230 : // to allow transferring the exception to other processes.
231 : try {
232 16 : plumed_assert(selector) << "Python interpreter is disabled, selection " + symbol + " cannot be interpreted";
233 : auto h=selector->contStop(); // stops again when it goes out of scope
234 16 : (*selector) << symbol << "\n";
235 16 : selector->flush();
236 : std::string res;
237 : std::vector<std::string> words;
238 : while(true) {
239 16 : selector->getline(res);
240 32 : words=Tools::getWords(res);
241 32 : if(!words.empty() && words[0]=="Error") plumed_error()<<res;
242 32 : if(!words.empty() && words[0]=="Selection:") break;
243 : }
244 : words.erase(words.begin());
245 712 : for(auto & w : words) {
246 : int n;
247 696 : if(w.empty()) continue;
248 696 : Tools::convert(w,n);
249 1392 : atoms.push_back(AtomNumber::serial(n));
250 : }
251 16 : ok=1;
252 32 : } catch (const Exception & e) {
253 0 : error_msg=e.what();
254 0 : }
255 16 : comm.Bcast(ok,0);
256 16 : if(!ok) {
257 0 : size_t s=error_msg.length();
258 0 : comm.Bcast(s,0);
259 0 : comm.Bcast(error_msg,0);
260 0 : throw Exception()<<error_msg;
261 : }
262 16 : size_t nat=atoms.size();
263 16 : comm.Bcast(nat,0);
264 16 : comm.Bcast(atoms,0);
265 : } else {
266 8 : int ok=0;
267 : std::string error_msg;
268 8 : comm.Bcast(ok,0);
269 8 : if(!ok) {
270 : size_t s;
271 0 : comm.Bcast(s,0);
272 0 : error_msg.resize(s);
273 0 : comm.Bcast(error_msg,0);
274 0 : throw Exception()<<error_msg;
275 : }
276 8 : size_t nat=0;
277 8 : comm.Bcast(nat,0);
278 8 : atoms.resize(nat);
279 8 : comm.Bcast(atoms,0);
280 : }
281 24 : log<<" selection interpreted using ";
282 60 : if(Tools::startWith(symbol,"mdt:")) log<<"mdtraj "<<cite("McGibbon et al, Biophys. J., 109, 1528 (2015)")<<"\n";
283 84 : if(Tools::startWith(symbol,"mda:")) log<<"MDAnalysis "<<cite("Gowers et al, Proceedings of the 15th Python in Science Conference, doi:10.25080/majora-629e541a-00e (2016)")<<"\n";
284 48 : if(Tools::startWith(symbol,"vmdexec:")) log<<"VMD "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
285 48 : if(Tools::startWith(symbol,"vmd:")) log<<"VMD (github.com/Eigenstate/vmd-python) "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
286 24 : return;
287 : }
288 593 : MolDataClass::specialSymbol( mytype, symbol, pdb, atoms );
289 593 : if(atoms.empty()) error(symbol + " not found in your MOLINFO structure");
290 : }
291 :
292 32447 : std::string GenericMolInfo::getAtomName(AtomNumber a)const {
293 32447 : return pdb.getAtomName(a);
294 : }
295 :
296 4 : bool GenericMolInfo::checkForAtom(AtomNumber a)const {
297 4 : return pdb.checkForAtom(a);
298 : }
299 :
300 60821 : unsigned GenericMolInfo::getResidueNumber(AtomNumber a)const {
301 60821 : return pdb.getResidueNumber(a);
302 : }
303 :
304 16599 : unsigned GenericMolInfo::getPDBsize()const {
305 16599 : return pdb.size();
306 : }
307 :
308 14769 : std::string GenericMolInfo::getResidueName(AtomNumber a)const {
309 14769 : return pdb.getResidueName(a);
310 : }
311 :
312 0 : std::string GenericMolInfo::getChainID(AtomNumber a)const {
313 0 : return pdb.getChainID(a);
314 : }
315 :
316 2 : Vector GenericMolInfo::getPosition(AtomNumber a)const {
317 2 : return pdb.getPosition(a);
318 : }
319 :
320 0 : bool GenericMolInfo::isWhole() const {
321 0 : return iswhole_;
322 : }
323 :
324 5121 : void GenericMolInfo::prepare() {
325 5121 : if(selector_running) {
326 3 : log<<" MOLINFO "<<getLabel()<<": killing python interpreter\n";
327 : selector.reset();
328 3 : selector_running=false;
329 : }
330 5121 : }
331 :
332 : }
|