LCOV - code coverage report
Current view: top level - core - GenericMolInfo.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 190 222 85.6 %
Date: 2024-10-18 13:59:31 Functions: 15 18 83.3 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2012-2023 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "GenericMolInfo.h"
      23             : #include "ActionRegister.h"
      24             : #include "ActionSet.h"
      25             : #include "PlumedMain.h"
      26             : #include "tools/MolDataClass.h"
      27             : #include "tools/PDB.h"
      28             : #include "tools/Communicator.h"
      29             : #include "config/Config.h"
      30             : 
      31             : namespace PLMD {
      32             : 
      33             : 
      34             : /*
      35             : This action is defined in core/ as it is used by other actions.
      36             : Anyway, it is registered in setup/, so that excluding that module from
      37             : compilation will exclude it from plumed.
      38             : */
      39             : 
      40             : 
      41         100 : void GenericMolInfo::registerKeywords( Keywords& keys ) {
      42         100 :   ActionSetup::registerKeywords(keys);
      43         200 :   keys.add("compulsory","STRUCTURE","a file in pdb format containing a reference structure. "
      44             :            "This is used to defines the atoms in the various residues, chains, etc . "
      45             :            "For more details on the PDB file format visit http://www.wwpdb.org/docs.html");
      46         200 :   keys.add("compulsory","MOLTYPE","protein","what kind of molecule is contained in the pdb file - usually not needed since protein/RNA/DNA are compatible");
      47         200 :   keys.add("compulsory","PYTHON_BIN","default","python interpreter");
      48         200 :   keys.add("atoms","CHAIN","(for masochists ( mostly Davide Branduardi ) ) The atoms involved in each of the chains of interest in the structure.");
      49         200 :   keys.add("hidden","STRIDE","frequency for resetting the python interpreter. Should be 1.");
      50         200 :   keys.addFlag("WHOLE", false, "The reference structure is whole, i.e. not broken by PBC");
      51         100 : }
      52             : 
      53         196 : GenericMolInfo::~GenericMolInfo() {
      54             : // empty destructor to delete unique_ptr
      55         294 : }
      56             : 
      57          98 : GenericMolInfo::GenericMolInfo( const ActionOptions&ao ):
      58             :   Action(ao),
      59             :   ActionAnyorder(ao),
      60             :   ActionPilot(ao),
      61             :   ActionAtomistic(ao),
      62          98 :   iswhole_(false)
      63             : {
      64          98 :   plumed_assert(getStride()==1);
      65             :   // Read what is contained in the pdb file
      66          98 :   parse("MOLTYPE",mytype);
      67             : 
      68             :   // check if whole
      69          98 :   parseFlag("WHOLE", iswhole_);
      70             : 
      71          98 :   auto* moldat=plumed.getActionSet().selectLatest<GenericMolInfo*>(this);
      72          98 :   if( moldat ) log<<"  overriding last MOLINFO with label " << moldat->getLabel()<<"\n";
      73             : 
      74             :   std::vector<AtomNumber> backbone;
      75         196 :   parseAtomList("CHAIN",backbone);
      76          98 :   if( read_backbone.size()==0 ) {
      77           0 :     for(unsigned i=1;; ++i) {
      78         196 :       parseAtomList("CHAIN",i,backbone);
      79          98 :       if( backbone.size()==0 ) break;
      80           0 :       read_backbone.push_back(backbone);
      81           0 :       backbone.resize(0);
      82             :     }
      83             :   } else {
      84           0 :     read_backbone.push_back(backbone);
      85             :   }
      86          98 :   if( read_backbone.size()==0 ) {
      87          98 :     parse("STRUCTURE",reference);
      88             : 
      89          98 :     if( ! pdb.read(reference,usingNaturalUnits(),0.1/getUnits().getLength()))plumed_merror("missing input file " + reference );
      90             : 
      91          98 :     std::vector<std::string> chains; pdb.getChainNames( chains );
      92          98 :     log.printf("  pdb file named %s contains %u chains \n",reference.c_str(), static_cast<unsigned>(chains.size()));
      93         289 :     for(unsigned i=0; i<chains.size(); ++i) {
      94             :       unsigned start,end; std::string errmsg;
      95         191 :       pdb.getResidueRange( chains[i], start, end, errmsg );
      96         191 :       if( errmsg.length()!=0 ) error( errmsg );
      97             :       AtomNumber astart,aend;
      98         191 :       pdb.getAtomRange( chains[i], astart, aend, errmsg );
      99         191 :       if( errmsg.length()!=0 ) error( errmsg );
     100         191 :       log.printf("  chain named %s contains residues %u to %u and atoms %u to %u \n",chains[i].c_str(),start,end,astart.serial(),aend.serial());
     101             :     }
     102             : 
     103             :     std::string python_bin;
     104         196 :     parse("PYTHON_BIN",python_bin);
     105          98 :     if(python_bin=="no") {
     106           0 :       log<<"  python interpreter disabled\n";
     107             :     } else {
     108         196 :       pythonCmd=config::getEnvCommand();
     109          98 :       if(python_bin!="default") {
     110           0 :         log<<"  forcing python interpreter: "<<python_bin<<"\n";
     111           0 :         pythonCmd+=" env PLUMED_PYTHON_BIN="+python_bin;
     112             :       }
     113             :       bool sorted=true;
     114          98 :       const auto & at=pdb.getAtomNumbers();
     115      102460 :       for(unsigned i=0; i<at.size(); i++) {
     116      102362 :         if(at[i].index()!=i) sorted=false;
     117             :       }
     118          98 :       if(!sorted) {
     119           2 :         log<<"  PDB is not sorted, python interpreter will be disabled\n";
     120          96 :       } else if(!Subprocess::available()) {
     121           0 :         log<<"  subprocess is not available, python interpreter will be disabled\n";
     122             :       } else {
     123          96 :         enablePythonInterpreter=true;
     124             :       }
     125             :     }
     126          98 :   }
     127          98 : }
     128             : 
     129           1 : std::map<std::string,std::string> GenericMolInfo::getSpecialKeywords() {
     130             :   std::map<std::string,std::string> mapkeys;
     131           1 :   mapkeys.insert( std::pair<std::string,std::string>("@mda:","atom selection built using syntax for <a href=\\\"https://docs.mdanalysis.org/stable/documentation_pages/selections.html\\\">MDAnalysis</a>") );
     132           1 :   mapkeys.insert( std::pair<std::string,std::string>("@mdt:","atom selection built using syntax for <a href=\\\"https://www.mdtraj.org/1.9.8.dev0/index.html\\\">mdtraj</a>") );
     133           1 :   mapkeys.insert( std::pair<std::string,std::string>("@vmdexec:","atom selection built using syntax for <a href=\\\"https://www.ks.uiuc.edu/Research/vmd/\\\">VMD</a>") );
     134           1 :   mapkeys.insert( std::pair<std::string,std::string>("@vmd:","atom selection built using syntax for <a href=\\\"https://www.ks.uiuc.edu/Research/vmd/\\\">VMD</a> python module") );
     135           1 :   mapkeys.insert( std::pair<std::string,std::string>("@nucleic","all atoms that are part of a DNA or RNA molecule") );
     136           1 :   mapkeys.insert( std::pair<std::string,std::string>("@protein","all atoms that are part of a protein") );
     137           1 :   mapkeys.insert( std::pair<std::string,std::string>("@water","all water molecules") );
     138           1 :   mapkeys.insert( std::pair<std::string,std::string>("@ions","all the ions") );
     139           1 :   mapkeys.insert( std::pair<std::string,std::string>("@hydrogens","all hydrogen atoms") );
     140           1 :   mapkeys.insert( std::pair<std::string,std::string>("@nonhydrogens","all non hydrogen atoms") );
     141           1 :   mapkeys.insert( std::pair<std::string,std::string>("@phi-","the four atoms that are required to calculate the phi dihedral for") );
     142           1 :   mapkeys.insert( std::pair<std::string,std::string>("@psi-","the four atoms that are required to calculate the psi dihedral for") );
     143           1 :   mapkeys.insert( std::pair<std::string,std::string>("@omega-","the four atoms that are required to calculate the omega dihedral for") );
     144           1 :   mapkeys.insert( std::pair<std::string,std::string>("@chi1-","the four atoms that are required to calculate the chi1 dihedral for") );
     145           1 :   mapkeys.insert( std::pair<std::string,std::string>("@chi2-","the four atoms that are required to calculate the chi2 dihedral for") );
     146           1 :   mapkeys.insert( std::pair<std::string,std::string>("@chi3-","the four atoms that are required to calculate the chi3 dihedral for") );
     147           1 :   mapkeys.insert( std::pair<std::string,std::string>("@chi4-","the four atoms that are required to calculate the chi4 dihedral for") );
     148           1 :   mapkeys.insert( std::pair<std::string,std::string>("@chi5-","the four atoms that are required to calculate the chi5 dihedral for") );
     149           1 :   mapkeys.insert( std::pair<std::string,std::string>("@sidechain-","the protein sidechain atoms in") );
     150           1 :   mapkeys.insert( std::pair<std::string,std::string>("@back-","the protein/dna/rna backbone atoms in") );
     151           1 :   mapkeys.insert( std::pair<std::string,std::string>("@alpha-","the four atoms that are required to calculate the alpha backbone dihedral for") );
     152           1 :   mapkeys.insert( std::pair<std::string,std::string>("@beta-","the four atoms that are required to calculate the beta backbone dihedral for") );
     153           1 :   mapkeys.insert( std::pair<std::string,std::string>("@gamma-","the four atoms that are required to calculate the gamma backbone dihedral for") );
     154           1 :   mapkeys.insert( std::pair<std::string,std::string>("@delta-","the four atoms that are required to calculate the delta backbone dihedral for") );
     155           1 :   mapkeys.insert( std::pair<std::string,std::string>("@epsilon-","the four atoms that are required to calculate the backbone epsilon dihedral for") );
     156           1 :   mapkeys.insert( std::pair<std::string,std::string>("@zeta-","the four atoms that are required to calculate the zeta backbone dihedral for") );
     157           1 :   mapkeys.insert( std::pair<std::string,std::string>("@v0-","the four atoms that are required to calculate the v0 sugar dihedral for") );
     158           1 :   mapkeys.insert( std::pair<std::string,std::string>("@v1-","the four atoms that are required to calculate the v1 sugar dihedral for") );
     159           1 :   mapkeys.insert( std::pair<std::string,std::string>("@v2-","the four atoms that are required to calculate the v2 sugar dihedral for") );
     160           1 :   mapkeys.insert( std::pair<std::string,std::string>("@v3-","the four atoms that are required to calculate the v3 sugar dihedral for") );
     161           1 :   mapkeys.insert( std::pair<std::string,std::string>("@v4-","the four atoms that are required to calculate the v4 sugar dihedral for") );
     162           1 :   mapkeys.insert( std::pair<std::string,std::string>("@chi-","the four atoms that are required to alcullate the chi dihedral for") );
     163           1 :   mapkeys.insert( std::pair<std::string,std::string>("@sugar-","the heavy atoms of the sugar in") );
     164           1 :   mapkeys.insert( std::pair<std::string,std::string>("@base-","the heavy atoms of the base in") );
     165           1 :   mapkeys.insert( std::pair<std::string,std::string>("@lcs-","an ordered triplet of atoms on the 6-membered ring of the nucleobase in") );
     166           1 :   return mapkeys;
     167             : }
     168             : 
     169          32 : void GenericMolInfo::getBackbone( std::vector<std::string>& restrings, const std::string& fortype, std::vector< std::vector<AtomNumber> >& backbone ) {
     170          32 :   if( fortype!=mytype ) error("cannot calculate a variable designed for " + fortype + " molecules for molecule type " + mytype );
     171          32 :   if( MolDataClass::numberOfAtomsPerResidueInBackbone( mytype )==0 ) error("backbone is not defined for molecule type " + mytype );
     172             : 
     173          32 :   if( read_backbone.size()!=0 ) {
     174           0 :     if( restrings.size()!=1 ) error("cannot interpret anything other than all for residues when using CHAIN keywords");
     175           0 :     if( restrings[0]!="all" ) error("cannot interpret anything other than all for residues when using CHAIN keywords");
     176           0 :     backbone.resize( read_backbone.size() );
     177           0 :     for(unsigned i=0; i<read_backbone.size(); ++i) {
     178           0 :       backbone[i].resize( read_backbone[i].size() );
     179           0 :       for(unsigned j=0; j<read_backbone[i].size(); ++j) backbone[i][j]=read_backbone[i][j];
     180             :     }
     181             :   } else {
     182             :     bool useter=false; // This is used to deal with terminal groups in WHOLEMOLECULES
     183          32 :     if( restrings.size()==1 ) {
     184          31 :       useter=( restrings[0].find("ter")!=std::string::npos );
     185          31 :       if( restrings[0].find("all")!=std::string::npos ) {
     186          30 :         std::vector<std::string> chains; pdb.getChainNames( chains );
     187         468 :         for(unsigned i=0; i<chains.size(); ++i) {
     188             :           unsigned r_start, r_end; std::string errmsg, mm, nn;
     189         438 :           pdb.getResidueRange( chains[i], r_start, r_end, errmsg );
     190         438 :           if( !useter ) {
     191         438 :             std::string resname = pdb.getResidueName( r_start );
     192         438 :             if( MolDataClass::isTerminalGroup( mytype, resname ) ) r_start++;
     193         438 :             resname = pdb.getResidueName( r_end );
     194         438 :             if( MolDataClass::isTerminalGroup( mytype, resname ) ) r_end--;
     195             :           }
     196         438 :           Tools::convert(r_start,mm); Tools::convert(r_end,nn);
     197         468 :           if(i==0) restrings[0] = mm + "-" + nn;
     198         816 :           else restrings.push_back(  mm + "-" + nn );
     199             :         }
     200          30 :       }
     201             :     }
     202          32 :     Tools::interpretRanges(restrings);
     203             : 
     204             :     // Convert the list of involved residues into a list of segments of chains
     205             :     int nk, nj; std::vector< std::vector<unsigned> > segments;
     206             :     std::vector<unsigned> thissegment;
     207          32 :     Tools::convert(restrings[0],nk); thissegment.push_back(nk);
     208        3540 :     for(unsigned i=1; i<restrings.size(); ++i) {
     209        3508 :       Tools::convert(restrings[i-1],nk);
     210        3508 :       Tools::convert(restrings[i],nj);
     211        9706 :       if( (nk+1)!=nj || pdb.getChainID(nk)!=pdb.getChainID(nj) ) {
     212         409 :         segments.push_back(thissegment);
     213         409 :         thissegment.resize(0);
     214             :       }
     215        3508 :       thissegment.push_back(nj);
     216             :     }
     217          32 :     segments.push_back( thissegment );
     218             : 
     219             :     // And now get the backbone atoms from each segment
     220          32 :     backbone.resize( segments.size() );
     221             :     std::vector<AtomNumber> atomnumbers;
     222         473 :     for(unsigned i=0; i<segments.size(); ++i) {
     223        3981 :       for(unsigned j=0; j<segments[i].size(); ++j) {
     224        3540 :         std::string resname=pdb.getResidueName( segments[i][j] );
     225        3540 :         if( !MolDataClass::allowedResidue(mytype, resname) ) {
     226           0 :           std::string num; Tools::convert( segments[i][j], num );
     227           0 :           error("residue " + num + " is not recognized for moltype " + mytype );
     228             :         }
     229        3540 :         if( !useter && MolDataClass::isTerminalGroup( mytype, resname ) ) {
     230           0 :           std::string num; Tools::convert( segments[i][j], num );
     231           0 :           error("residue " + num + " appears to be a terminal group");
     232             :         }
     233        3540 :         if( resname=="GLY" ) warning("GLY residues are achiral - assuming HA1 atom is in CB position");
     234        3540 :         MolDataClass::getBackboneForResidue( mytype, segments[i][j], pdb, atomnumbers );
     235        3540 :         if( atomnumbers.size()==0 ) {
     236           0 :           std::string num; Tools::convert( segments[i][j], num );
     237           0 :           error("Could not find required backbone atom in residue number " + num );
     238             :         } else {
     239       21240 :           for(unsigned k=0; k<atomnumbers.size(); ++k) backbone[i].push_back( atomnumbers[k] );
     240             :         }
     241        3540 :         atomnumbers.resize(0);
     242             :       }
     243             :     }
     244          32 :   }
     245          32 : }
     246             : 
     247         661 : void GenericMolInfo::interpretSymbol( const std::string& symbol, std::vector<AtomNumber>& atoms ) {
     248        2590 :   if(Tools::startWith(symbol,"mdt:") || Tools::startWith(symbol,"mda:") || Tools::startWith(symbol,"vmd:") || Tools::startWith(symbol,"vmdexec:")) {
     249             : 
     250          24 :     plumed_assert(enablePythonInterpreter);
     251             : 
     252          48 :     log<<"  symbol " + symbol + " will be sent to python interpreter\n";
     253          24 :     if(!selector_running) {
     254           3 :       log<<"  MOLINFO "<<getLabel()<<": starting python interpreter\n";
     255           3 :       if(comm.Get_rank()==0) {
     256           4 :         selector=Tools::make_unique<Subprocess>(pythonCmd+" \""+config::getPlumedRoot()+"\"/scripts/selector.sh --pdb " + reference);
     257           2 :         selector->stop();
     258             :       }
     259           3 :       selector_running=true;
     260             :     }
     261             : 
     262          24 :     atoms.resize(0);
     263             : 
     264          24 :     if(comm.Get_rank()==0) {
     265          16 :       int ok=0;
     266             :       std::string error_msg;
     267             :       // this is a complicated way to have the exception propagated with MPI.
     268             :       // It is necessary since only one process calls the selector.
     269             :       // Probably I should recycle the exception propagation at library boundaries
     270             :       // to allow transferring the exception to other processes.
     271             :       try {
     272          16 :         plumed_assert(selector) << "Python interpreter is disabled, selection " + symbol + " cannot be interpreted";
     273             :         auto h=selector->contStop(); // stops again when it goes out of scope
     274          16 :         (*selector) << symbol << "\n";
     275          16 :         selector->flush();
     276             :         std::string res;
     277             :         std::vector<std::string> words;
     278             :         while(true) {
     279          16 :           selector->getline(res);
     280          32 :           words=Tools::getWords(res);
     281          32 :           if(!words.empty() && words[0]=="Error") plumed_error()<<res;
     282          32 :           if(!words.empty() && words[0]=="Selection:") break;
     283             :         }
     284             :         words.erase(words.begin());
     285         712 :         for(const auto & w : words) {
     286             :           int n;
     287         696 :           if(w.empty()) continue;
     288         696 :           Tools::convert(w,n);
     289        1392 :           atoms.push_back(AtomNumber::serial(n));
     290             :         }
     291          16 :         ok=1;
     292          32 :       } catch (const Exception & e) {
     293           0 :         error_msg=e.what();
     294           0 :       }
     295          16 :       comm.Bcast(ok,0);
     296          16 :       if(!ok) {
     297           0 :         size_t s=error_msg.length();
     298           0 :         comm.Bcast(s,0);
     299           0 :         comm.Bcast(error_msg,0);
     300           0 :         throw Exception()<<error_msg;
     301             :       }
     302          16 :       size_t nat=atoms.size();
     303          16 :       comm.Bcast(nat,0);
     304          16 :       comm.Bcast(atoms,0);
     305             :     } else {
     306           8 :       int ok=0;
     307             :       std::string error_msg;
     308           8 :       comm.Bcast(ok,0);
     309           8 :       if(!ok) {
     310             :         size_t s;
     311           0 :         comm.Bcast(s,0);
     312           0 :         error_msg.resize(s);
     313           0 :         comm.Bcast(error_msg,0);
     314           0 :         throw Exception()<<error_msg;
     315             :       }
     316           8 :       size_t nat=0;
     317           8 :       comm.Bcast(nat,0);
     318           8 :       atoms.resize(nat);
     319           8 :       comm.Bcast(atoms,0);
     320             :     }
     321          24 :     log<<"  selection interpreted using ";
     322          54 :     if(Tools::startWith(symbol,"mdt:")) log<<"mdtraj "<<cite("McGibbon et al, Biophys. J., 109, 1528 (2015)")<<"\n";
     323          66 :     if(Tools::startWith(symbol,"mda:")) log<<"MDAnalysis "<<cite("Gowers et al, Proceedings of the 15th Python in Science Conference, doi:10.25080/majora-629e541a-00e (2016)")<<"\n";
     324          48 :     if(Tools::startWith(symbol,"vmdexec:")) log<<"VMD "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
     325          48 :     if(Tools::startWith(symbol,"vmd:")) log<<"VMD (github.com/Eigenstate/vmd-python) "<<cite("Humphrey, Dalke, and Schulten, K., J. Molec. Graphics, 14, 33 (1996)")<<"\n";
     326          24 :     return;
     327             :   }
     328         637 :   MolDataClass::specialSymbol( mytype, symbol, pdb, atoms );
     329         637 :   if(atoms.empty()) error(symbol + " not found in your MOLINFO structure");
     330             : }
     331             : 
     332       68097 : std::string GenericMolInfo::getAtomName(AtomNumber a)const {
     333       68097 :   return pdb.getAtomName(a);
     334             : }
     335             : 
     336         249 : bool GenericMolInfo::checkForAtom(AtomNumber a)const {
     337         249 :   return pdb.checkForAtom(a);
     338             : }
     339             : 
     340       61001 : unsigned GenericMolInfo::getResidueNumber(AtomNumber a)const {
     341       61001 :   return pdb.getResidueNumber(a);
     342             : }
     343             : 
     344       17337 : unsigned GenericMolInfo::getPDBsize()const {
     345       17337 :   return pdb.size();
     346             : }
     347             : 
     348       14949 : std::string GenericMolInfo::getResidueName(AtomNumber a)const {
     349       14949 :   return pdb.getResidueName(a);
     350             : }
     351             : 
     352           0 : std::string GenericMolInfo::getChainID(AtomNumber a)const {
     353           0 :   return pdb.getChainID(a);
     354             : }
     355             : 
     356         186 : Vector GenericMolInfo::getPosition(AtomNumber a)const {
     357         186 :   return pdb.getPosition(a);
     358             : }
     359             : 
     360           2 : bool GenericMolInfo::isWhole() const {
     361           2 :   return iswhole_;
     362             : }
     363             : 
     364        5357 : void GenericMolInfo::prepare() {
     365        5357 :   if(selector_running) {
     366           3 :     log<<"  MOLINFO "<<getLabel()<<": killing python interpreter\n";
     367             :     selector.reset();
     368           3 :     selector_running=false;
     369             :   }
     370        5357 : }
     371             : 
     372             : }

Generated by: LCOV version 1.16