LCOV - code coverage report
Current view: top level - tools - PDB.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 220 418 52.6 %
Date: 2024-10-18 14:00:25 Functions: 27 44 61.4 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2011-2023 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "PDB.h"
      23             : #include "Tools.h"
      24             : #include "Log.h"
      25             : #include "h36.h"
      26             : #include <cstdio>
      27             : #include <iostream>
      28             : #include "core/GenericMolInfo.h"
      29             : #include "Tensor.h"
      30             : 
      31             : //+PLUMEDOC INTERNAL pdbreader
      32             : /*
      33             : PLUMED can use the PDB format in several places
      34             : 
      35             : - To read molecular structure (\ref MOLINFO).
      36             : - To read reference conformations (\ref RMSD, but also many other methods in \ref dists, \ref FIT_TO_TEMPLATE, etc).
      37             : 
      38             : The implemented PDB reader expects a file formatted correctly according to the
      39             : [PDB standard](http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html).
      40             : In particular, the following columns are read from ATOM records
      41             : \verbatim
      42             : columns | content
      43             : 1-6     | record name (ATOM or HETATM)
      44             : 7-11    | serial number of the atom (starting from 1)
      45             : 13-16   | atom name
      46             : 18-20   | residue name
      47             : 22      | chain id
      48             : 23-26   | residue number
      49             : 31-38   | x coordinate
      50             : 39-46   | y coordinate
      51             : 47-54   | z coordinate
      52             : 55-60   | occupancy
      53             : 61-66   | beta factor
      54             : \endverbatim
      55             : PLUMED parser is slightly more permissive than the official PDB format
      56             : in the fact that the format of real numbers is not fixed. In other words,
      57             : any real number that can be parsed is OK and the dot can be placed anywhere. However,
      58             : __columns are interpret strictly__. A sample PDB should look like the following
      59             : \verbatim
      60             : ATOM      2  CH3 ACE     1      12.932 -14.718  -6.016  1.00  1.00
      61             : ATOM      5  C   ACE     1      21.312  -9.928  -5.946  1.00  1.00
      62             : ATOM      9  CA  ALA     2      19.462 -11.088  -8.986  1.00  1.00
      63             : \endverbatim
      64             : 
      65             : Notice that serial numbers need not to be consecutive. In the three-line example above,
      66             : only the coordinates of three atoms are provided. This is perfectly legal and indicates PLUMED
      67             : that information about these atoms only is available. This could be both for structural
      68             : information in \ref MOLINFO, where the other atoms would have no name assigned, and for
      69             : reference structures used in \ref RMSD, where only the provided atoms would be used to compute RMSD.
      70             : 
      71             : \par Occupancy and beta factors
      72             : 
      73             : PLUMED reads also occupancy and beta factors that however are given a very special meaning.
      74             : In cases where the PDB structure is used as a reference for an alignment (that's the case
      75             : for instance in \ref RMSD and in \ref FIT_TO_TEMPLATE), the occupancy column is used
      76             : to provide the weight of each atom in the alignment. In cases where, perhaps after alignment,
      77             : the displacement between running coordinates and the provided PDB is computed, the beta factors
      78             : are used as weight for the displacement.
      79             : Since setting the weights to zero is the same as __not__ including an atom in the alignment or
      80             : displacement calculation, the two following reference files would be equivalent when used in an \ref RMSD
      81             : calculation. First file:
      82             : \verbatim
      83             : ATOM      2  CH3 ACE     1      12.932 -14.718  -6.016  1.00  1.00
      84             : ATOM      5  C   ACE     1      21.312  -9.928  -5.946  1.00  1.00
      85             : ATOM      9  CA  ALA     2      19.462 -11.088  -8.986  0.00  0.00
      86             : \endverbatim
      87             : Second file:
      88             : \verbatim
      89             : ATOM      2  CH3 ACE     1      12.932 -14.718  -6.016  1.00  1.00
      90             : ATOM      5  C   ACE     1      21.312  -9.928  -5.946  1.00  1.00
      91             : \endverbatim
      92             : However notice that many extra atoms with zero weight might slow down the calculation, so
      93             : removing lines is better than setting their weights to zero.
      94             : In addition, weights for alignment need not to be equivalent to weights for displacement.
      95             : Starting with PLUMED 2.7, if all the weights are set to zero they will be normalized to be equal to the
      96             : inverse of the number of involved atoms. This means that it will be possible to use files with
      97             : the weight columns set to zero obtaining a meaningful result. In previous PLUMED versions,
      98             : setting all weights to zero was resulting in an error instead.
      99             : 
     100             : 
     101             : \par Systems with more than 100k atoms
     102             : 
     103             : Notice that it very likely does not make any sense to compute the \ref RMSD or any other structural
     104             : deviation __using__ so many atoms. However, if the protein for which you want to compute \ref RMSD
     105             : has atoms with large serial numbers (e.g. because it is located __after__ solvent in the sorted list of atoms)
     106             : you might end up with troubles with the limitations of the PDB format. Indeed, since there are 5
     107             : columns available for atom serial number, this number cannot be larger than 99999.
     108             : In addition, providing \ref MOLINFO with names associated to atoms with a serial larger than 99999 would be impossible.
     109             : 
     110             : Since PLUMED 2.4 we allow [hybrid 36](http://cci.lbl.gov/hybrid_36/) format to be used to specify atom numbers.
     111             : This format is not particularly widespread, but has the nice feature that it provides a one-to-one mapping
     112             : between numbers up to approximately 80 millions and strings with 5 characters, plus it is backward compatible
     113             : for numbers smaller than 100000. This is not true for notations like the hex notation exported by VMD.
     114             : Using the hybrid 36 format, the ATOM records for atom ranging from 99997 to 100002 would read like these:
     115             : \verbatim
     116             : ATOM  99997  Ar      X   1      45.349  38.631  15.116  1.00  1.00
     117             : ATOM  99998  Ar      X   1      46.189  38.631  15.956  1.00  1.00
     118             : ATOM  99999  Ar      X   1      46.189  39.471  15.116  1.00  1.00
     119             : ATOM  A0000  Ar      X   1      45.349  39.471  15.956  1.00  1.00
     120             : ATOM  A0000  Ar      X   1      45.349  38.631  16.796  1.00  1.00
     121             : ATOM  A0001  Ar      X   1      46.189  38.631  17.636  1.00  1.00
     122             : \endverbatim
     123             : There are tools that can be found to translate from integers to strings and back using hybrid 36 format
     124             : (a simple python script can be found [here](https://sourceforge.net/p/cctbx/code/HEAD/tree/trunk/iotbx/pdb/hybrid_36.py)).
     125             : In addition, as of PLUMED 2.5, we provide a \ref pdbrenumber "command line tool" that can be used to renumber atoms in a PDB file.
     126             : 
     127             : */
     128             : //+ENDPLUMEDOC
     129             : 
     130             : 
     131             : namespace PLMD {
     132             : 
     133           0 : void PDB::setAtomNumbers( const std::vector<AtomNumber>& atoms ) {
     134           0 :   positions.resize( atoms.size() ); occupancy.resize( atoms.size() );
     135           0 :   beta.resize( atoms.size() ); numbers.resize( atoms.size() );
     136           0 :   for(unsigned i=0; i<atoms.size(); ++i) { numbers[i]=atoms[i]; beta[i]=1.0; occupancy[i]=1.0; }
     137           0 : }
     138             : 
     139           0 : void PDB::setArgumentNames( const std::vector<std::string>& argument_names ) {
     140           0 :   argnames.resize( argument_names.size() ); std::vector<double> tmp(1,0);
     141           0 :   for(unsigned i=0; i<argument_names.size(); ++i) {
     142             :     argnames[i]=argument_names[i];
     143           0 :     arg_data.insert( std::pair<std::string,std::vector<double> >( argnames[i], tmp ) );
     144             :   }
     145           0 : }
     146             : 
     147        2148 : bool PDB::getArgumentValue( const std::string& name, std::vector<double>& value ) const {
     148             :   std::map<std::string,std::vector<double> >::const_iterator it = arg_data.find(name);
     149        2148 :   if( it!=arg_data.end() ) {
     150        2148 :     if( value.size()!=it->second.size() ) return false;
     151        4300 :     for(unsigned i=0; i<value.size(); ++i) value[i] = it->second[i];
     152             :     return true;
     153             :   }
     154             :   return false;
     155             : }
     156             : 
     157           0 : void PDB::setAtomPositions( const std::vector<Vector>& pos ) {
     158           0 :   plumed_assert( pos.size()==positions.size() );
     159           0 :   for(unsigned i=0; i<positions.size(); ++i) positions[i]=pos[i];
     160           0 : }
     161             : 
     162           0 : void PDB::setArgumentValue( const std::string& argname, const double& val ) {
     163             :   // First set the value of the value of the argument in the map
     164           0 :   arg_data.find(argname)->second.resize(1);
     165           0 :   arg_data.find(argname)->second[0] = val;
     166           0 : }
     167             : 
     168             : // bool PDB::hasRequiredProperties( const std::vector<std::string>& inproperties ){
     169             : //   bool hasprop=false;
     170             : //   for(unsigned i=0;i<remark.size();++i){
     171             : //       if( remark[i].find("PROPERTIES=")!=std::string::npos){ hasprop=true; break; }
     172             : //   }
     173             : //   if( !hasprop ){
     174             : //       std::string mypropstr="PROPERTIES=" + inproperties[0];
     175             : //       for(unsigned i=1;i<inproperties.size();++i) mypropstr += "," + inproperties[i];
     176             : //       remark.push_back( mypropstr );
     177             : //   }
     178             : //   // Now check that all required properties are there
     179             : //   for(unsigned i=0;i<inproperties.size();++i){
     180             : //       hasprop=false;
     181             : //       for(unsigned j=0;j<remark.size();++j){
     182             : //           if( remark[j].find(inproperties[i]+"=")!=std::string::npos){ hasprop=true; break; }
     183             : //       }
     184             : //       if( !hasprop ) return false;
     185             : //   }
     186             : //   return true;
     187             : // }
     188             : 
     189           0 : void PDB::addBlockEnd( const unsigned& end ) {
     190           0 :   block_ends.push_back( end );
     191           0 : }
     192             : 
     193           3 : unsigned PDB::getNumberOfAtomBlocks()const {
     194           3 :   return block_ends.size();
     195             : }
     196             : 
     197           6 : const std::vector<unsigned> & PDB::getAtomBlockEnds()const {
     198           6 :   return block_ends;
     199             : }
     200             : 
     201    23575319 : const std::vector<Vector> & PDB::getPositions()const {
     202    23575319 :   return positions;
     203             : }
     204             : 
     205           0 : void PDB::setPositions(const std::vector<Vector> &v ) {
     206           0 :   plumed_assert( v.size()==positions.size() );
     207           0 :   positions=v;
     208           0 : }
     209             : 
     210      153889 : const std::vector<double> & PDB::getOccupancy()const {
     211      153889 :   return occupancy;
     212             : }
     213             : 
     214      152222 : const std::vector<double> & PDB::getBeta()const {
     215      152222 :   return beta;
     216             : }
     217             : 
     218        4603 : void PDB::addRemark( std::vector<std::string>& v1 ) {
     219        4603 :   Tools::parse(v1,"TYPE",mtype);
     220        4603 :   Tools::parseVector(v1,"ARG",argnames);
     221       12369 :   for(unsigned i=0; i<v1.size(); ++i) {
     222        7766 :     if( v1[i].find("=")!=std::string::npos ) {
     223             :       std::size_t eq=v1[i].find_first_of('=');
     224        7164 :       std::string name=v1[i].substr(0,eq);
     225        7164 :       std::string sval=v1[i].substr(eq+1);
     226             :       // double val; Tools::convert( sval, val );
     227        7164 :       std::vector<std::string> words=Tools::getWords(sval,"\t\n ,");
     228        7164 :       std::vector<double> val( words.size() );
     229       14336 :       for(unsigned i=0; i<val.size(); ++i) Tools::convert( words[i], val[i] );
     230       14328 :       arg_data.insert( std::pair<std::string,std::vector<double> >( name, val ) );
     231        7164 :     } else {
     232         602 :       flags.push_back(v1[i]);
     233             :     }
     234             :   }
     235        4603 : }
     236             : 
     237           0 : bool PDB::hasFlag( const std::string& fname ) const {
     238           0 :   for(unsigned i=0; i<flags.size(); ++i) {
     239           0 :     if( flags[i]==fname ) return true;
     240             :   }
     241             :   return false;
     242             : }
     243             : 
     244             : 
     245      314593 : const std::vector<AtomNumber> & PDB::getAtomNumbers()const {
     246      314593 :   return numbers;
     247             : }
     248             : 
     249           0 : const Vector & PDB::getBoxAxs()const {
     250           0 :   return BoxXYZ;
     251             : }
     252             : 
     253           0 : const Vector & PDB::getBoxAng()const {
     254           0 :   return BoxABG;
     255             : }
     256             : 
     257          12 : const Tensor & PDB::getBoxVec()const {
     258          12 :   return Box;
     259             : }
     260             : 
     261     6941325 : std::string PDB::getAtomName(AtomNumber a)const {
     262             :   const auto p=number2index.find(a);
     263     6941325 :   if(p==number2index.end()) {
     264           0 :     std::string num; Tools::convert( a.serial(), num );
     265           0 :     plumed_merror("Name of atom " + num + " not found" );
     266     6941325 :   } else return atomsymb[p->second];
     267             : }
     268             : 
     269      166219 : unsigned PDB::getResidueNumber(AtomNumber a)const {
     270             :   const auto p=number2index.find(a);
     271      166219 :   if(p==number2index.end()) {
     272           0 :     std::string num; Tools::convert( a.serial(), num );
     273           0 :     plumed_merror("Residue for atom " + num + " not found" );
     274      166219 :   } else return residue[p->second];
     275             : }
     276             : 
     277      180036 : std::string PDB::getResidueName(AtomNumber a) const {
     278             :   const auto p=number2index.find(a);
     279      180036 :   if(p==number2index.end()) {
     280           0 :     std::string num; Tools::convert( a.serial(), num );
     281           0 :     plumed_merror("Residue for atom " + num + " not found" );
     282      180036 :   } else return residuenames[p->second];
     283             : }
     284             : 
     285   233314275 : unsigned PDB::size()const {
     286   233314275 :   return positions.size();
     287             : }
     288             : 
     289        4561 : bool PDB::readFromFilepointer(FILE *fp,bool naturalUnits,double scale) {
     290             :   //cerr<<file<<endl;
     291             :   bool file_is_alive=false;
     292        4561 :   if(naturalUnits) scale=1.0;
     293             :   std::string line;
     294             :   fpos_t pos; bool between_ters=true;
     295      457393 :   while(Tools::getline(fp,line)) {
     296             :     //cerr<<line<<"\n";
     297      456889 :     fgetpos (fp,&pos);
     298     3320290 :     while(line.length()<80) line.push_back(' ');
     299      456889 :     std::string record=line.substr(0,6);
     300      456889 :     std::string serial=line.substr(6,5);
     301      456889 :     std::string atomname=line.substr(12,4);
     302      456889 :     std::string residuename=line.substr(17,3);
     303      456889 :     std::string chainID=line.substr(21,1);
     304      456889 :     std::string resnum=line.substr(22,4);
     305      456889 :     std::string x=line.substr(30,8);
     306      456889 :     std::string y=line.substr(38,8);
     307      456889 :     std::string z=line.substr(46,8);
     308      456889 :     std::string occ=line.substr(54,6);
     309      456889 :     std::string bet=line.substr(60,6);
     310      456889 :     std::string BoxX=line.substr(6,9);
     311      456889 :     std::string BoxY=line.substr(15,9);
     312      456889 :     std::string BoxZ=line.substr(24,9);
     313      456889 :     std::string BoxA=line.substr(33,7);
     314      456889 :     std::string BoxB=line.substr(40,7);
     315      456889 :     std::string BoxG=line.substr(47,7);
     316      456889 :     Tools::trim(record);
     317      456889 :     if(record=="TER") { between_ters=false; block_ends.push_back( positions.size() ); }
     318      456889 :     if(record=="END") { file_is_alive=true;  break;}
     319      452975 :     if(record=="ENDMDL") { file_is_alive=true;  break;}
     320      452832 :     if(record=="REMARK") {
     321        9206 :       std::vector<std::string> v1;  v1=Tools::getWords(line.substr(6));
     322        4603 :       addRemark( v1 );
     323        4603 :     }
     324      452832 :     if(record=="CRYST1") {
     325          79 :       Tools::convert(BoxX,BoxXYZ[0]);
     326          79 :       Tools::convert(BoxY,BoxXYZ[1]);
     327          79 :       Tools::convert(BoxZ,BoxXYZ[2]);
     328          79 :       Tools::convert(BoxA,BoxABG[0]);
     329          79 :       Tools::convert(BoxB,BoxABG[1]);
     330          79 :       Tools::convert(BoxG,BoxABG[2]);
     331          79 :       BoxXYZ*=scale;
     332          79 :       double cosA=std::cos(BoxABG[0]*pi/180.);
     333          79 :       double cosB=std::cos(BoxABG[1]*pi/180.);
     334          79 :       double cosG=std::cos(BoxABG[2]*pi/180.);
     335          79 :       double sinG=std::sin(BoxABG[2]*pi/180.);
     336         316 :       for (unsigned i=0; i<3; i++) {Box[i][0]=0.; Box[i][1]=0.; Box[i][2]=0.;}
     337          79 :       Box[0][0]=BoxXYZ[0];
     338          79 :       Box[1][0]=BoxXYZ[1]*cosG;
     339          79 :       Box[1][1]=BoxXYZ[1]*sinG;
     340          79 :       Box[2][0]=BoxXYZ[2]*cosB;
     341          79 :       Box[2][1]=(BoxXYZ[2]*BoxXYZ[1]*cosA-Box[2][0]*Box[1][0])/Box[1][1];
     342          79 :       Box[2][2]=std::sqrt(BoxXYZ[2]*BoxXYZ[2]-Box[2][0]*Box[2][0]-Box[2][1]*Box[2][1]);
     343             :     }
     344      458709 :     if(record=="ATOM" || record=="HETATM") {
     345             :       between_ters=true;
     346             :       AtomNumber a;
     347      446955 :       unsigned resno=0; // GB: when resnum string is not present, we set res number to zero
     348             :       double o,b;
     349      446955 :       Vector p;
     350             :       {
     351             :         int result;
     352      446955 :         auto trimmed=serial;
     353      446955 :         Tools::trim(trimmed);
     354      447017 :         while(trimmed.length()<5) trimmed = std::string(" ") + trimmed;
     355      446955 :         const char* errmsg = h36::hy36decode(5, trimmed.c_str(),trimmed.length(), &result);
     356      446955 :         if(errmsg) {
     357           0 :           std::string msg(errmsg);
     358           0 :           plumed_merror(msg);
     359             :         }
     360      446955 :         a.setSerial(result);
     361             :       }
     362             : 
     363             :       // allow skipping residue number
     364             :       {
     365      446955 :         auto trimmed=resnum;
     366      446955 :         Tools::trim(trimmed);
     367      446955 :         if(trimmed.length()>0) {
     368             :           int result;
     369      446955 :           while(trimmed.length()<4) trimmed = std::string(" ") + trimmed;
     370      446955 :           const char* errmsg = h36::hy36decode(4, trimmed.c_str(),trimmed.length(), &result);
     371      446955 :           if(errmsg) {
     372           0 :             std::string msg(errmsg);
     373           0 :             plumed_merror(msg);
     374             :           }
     375      446955 :           resno=result;
     376             :         }
     377             :       }
     378             : 
     379      446955 :       Tools::convert(occ,o);
     380      446955 :       Tools::convert(bet,b);
     381      446955 :       Tools::convert(x,p[0]);
     382      446955 :       Tools::convert(y,p[1]);
     383      446955 :       Tools::convert(z,p[2]);
     384             :       // scale into nm
     385      446955 :       p*=scale;
     386      446955 :       numbers.push_back(a);
     387      446955 :       number2index[a]=positions.size();
     388      446955 :       std::size_t startpos=atomname.find_first_not_of(" \t");
     389      446955 :       std::size_t endpos=atomname.find_last_not_of(" \t");
     390      446955 :       atomsymb.push_back( atomname.substr(startpos, endpos-startpos+1) );
     391      446955 :       residue.push_back(resno);
     392      446955 :       chain.push_back(chainID);
     393      446955 :       occupancy.push_back(o);
     394      446955 :       beta.push_back(b);
     395      446955 :       positions.push_back(p);
     396      446955 :       residuenames.push_back(residuename);
     397             :     }
     398             :   }
     399        4561 :   if( between_ters ) block_ends.push_back( positions.size() );
     400        4561 :   return file_is_alive;
     401             : }
     402             : 
     403         436 : bool PDB::read(const std::string&file,bool naturalUnits,double scale) {
     404         436 :   FILE* fp=std::fopen(file.c_str(),"r");
     405         436 :   if(!fp) return false;
     406             : // call fclose when exiting this function
     407         435 :   auto deleter=[](auto f) { std::fclose(f); };
     408             :   std::unique_ptr<FILE,decltype(deleter)> fp_deleter(fp,deleter);
     409         435 :   readFromFilepointer(fp,naturalUnits,scale);
     410             :   return true;
     411             : }
     412             : 
     413         270 : void PDB::getChainNames( std::vector<std::string>& chains ) const {
     414         270 :   chains.resize(0);
     415         270 :   chains.push_back( chain[0] );
     416      549332 :   for(unsigned i=1; i<size(); ++i) {
     417      549062 :     if( chains[chains.size()-1]!=chain[i] ) chains.push_back( chain[i] );
     418             :   }
     419         270 : }
     420             : 
     421       11761 : void PDB::getResidueRange( const std::string& chainname, unsigned& res_start, unsigned& res_end, std::string& errmsg ) const {
     422             :   bool inres=false, foundchain=false;
     423    30698435 :   for(unsigned i=0; i<size(); ++i) {
     424    30686674 :     if( chain[i]==chainname ) {
     425    26688896 :       if(!inres) {
     426       11761 :         if(foundchain) errmsg="found second start of chain named " + chainname;
     427       11761 :         res_start=residue[i];
     428             :       }
     429             :       inres=true; foundchain=true;
     430     3997778 :     } else if( inres && chain[i]!=chainname ) {
     431             :       inres=false;
     432       11221 :       res_end=residue[i-1];
     433             :     }
     434             :   }
     435       11761 :   if(inres) res_end=residue[size()-1];
     436       11761 : }
     437             : 
     438         191 : void PDB::getAtomRange( const std::string& chainname, AtomNumber& a_start, AtomNumber& a_end, std::string& errmsg ) const {
     439             :   bool inres=false, foundchain=false;
     440      496023 :   for(unsigned i=0; i<size(); ++i) {
     441      495832 :     if( chain[i]==chainname ) {
     442      102362 :       if(!inres) {
     443         191 :         if(foundchain) errmsg="found second start of chain named " + chainname;
     444         191 :         a_start=numbers[i];
     445             :       }
     446             :       inres=true; foundchain=true;
     447      393470 :     } else if( inres && chain[i]!=chainname ) {
     448             :       inres=false;
     449          93 :       a_end=numbers[i-1];
     450             :     }
     451             :   }
     452         191 :   if(inres) a_end=numbers[size()-1];
     453         191 : }
     454             : 
     455        7956 : std::string PDB::getResidueName( const unsigned& resnum ) const {
     456     9758352 :   for(unsigned i=0; i<size(); ++i) {
     457     9758352 :     if( residue[i]==resnum ) return residuenames[i];
     458             :   }
     459           0 :   std::string num; Tools::convert( resnum, num );
     460           0 :   plumed_merror("residue " + num + " not found" );
     461             : }
     462             : 
     463       49958 : std::string PDB::getResidueName(const unsigned& resnum,const std::string& chainid ) const {
     464    64916453 :   for(unsigned i=0; i<size(); ++i) {
     465    64966591 :     if( residue[i]==resnum && ( chainid=="*" || chain[i]==chainid) ) return residuenames[i];
     466             :   }
     467           0 :   std::string num; Tools::convert( resnum, num );
     468           0 :   plumed_merror("residue " + num + " not found in chain " + chainid );
     469             : }
     470             : 
     471             : 
     472       17700 : AtomNumber PDB::getNamedAtomFromResidue( const std::string& aname, const unsigned& resnum ) const {
     473    21799572 :   for(unsigned i=0; i<size(); ++i) {
     474    21799572 :     if( residue[i]==resnum && atomsymb[i]==aname ) return numbers[i];
     475             :   }
     476           0 :   std::string num; Tools::convert( resnum, num );
     477           0 :   plumed_merror("residue " + num + " does not contain an atom named " + aname );
     478             : }
     479             : 
     480        2394 : AtomNumber PDB::getNamedAtomFromResidueAndChain( const std::string& aname, const unsigned& resnum, const std::string& chainid ) const {
     481     1087620 :   for(unsigned i=0; i<size(); ++i) {
     482     1090014 :     if( residue[i]==resnum && atomsymb[i]==aname && ( chainid=="*" || chain[i]==chainid) ) return numbers[i];
     483             :   }
     484           0 :   std::string num; Tools::convert( resnum, num );
     485           0 :   plumed_merror("residue " + num + " from chain " + chainid + " does not contain an atom named " + aname );
     486             : }
     487             : 
     488       35470 : std::vector<AtomNumber> PDB::getAtomsInResidue(const unsigned& resnum,const std::string& chainid)const {
     489             :   std::vector<AtomNumber> tmp;
     490    95754470 :   for(unsigned i=0; i<size(); ++i) {
     491    96261470 :     if( residue[i]==resnum && ( chainid=="*" || chain[i]==chainid) ) tmp.push_back(numbers[i]);
     492             :   }
     493       35470 :   if(tmp.size()==0) {
     494           0 :     std::string num; Tools::convert( resnum, num );
     495           0 :     plumed_merror("Cannot find residue " + num + " from chain " + chainid  );
     496             :   }
     497       35470 :   return tmp;
     498             : }
     499             : 
     500           0 : std::vector<AtomNumber> PDB::getAtomsInChain(const std::string& chainid)const {
     501             :   std::vector<AtomNumber> tmp;
     502           0 :   for(unsigned i=0; i<size(); ++i) {
     503           0 :     if( chainid=="*" || chain[i]==chainid ) tmp.push_back(numbers[i]);
     504             :   }
     505           0 :   if(tmp.size()==0) {
     506           0 :     plumed_merror("Cannot find atoms from chain " + chainid  );
     507             :   }
     508           0 :   return tmp;
     509             : }
     510             : 
     511        6198 : std::string PDB::getChainID(const unsigned& resnumber) const {
     512     7587064 :   for(unsigned i=0; i<size(); ++i) {
     513     7587064 :     if(resnumber==residue[i]) return chain[i];
     514             :   }
     515           0 :   plumed_merror("Not enough residues in pdb input file");
     516             : }
     517             : 
     518           0 : std::string PDB::getChainID(AtomNumber a) const {
     519             :   const auto p=number2index.find(a);
     520           0 :   if(p==number2index.end()) {
     521           0 :     std::string num; Tools::convert( a.serial(), num );
     522           0 :     plumed_merror("Chain for atom " + num + " not found" );
     523             :   }
     524           0 :   return chain[p->second];
     525             : }
     526             : 
     527           0 : bool PDB::checkForResidue( const std::string& name ) const {
     528           0 :   for(unsigned i=0; i<size(); ++i) {
     529           0 :     if( residuenames[i]==name ) return true;
     530             :   }
     531             :   return false;
     532             : }
     533             : 
     534           0 : bool PDB::checkForAtom( const std::string& name ) const {
     535           0 :   for(unsigned i=0; i<size(); ++i) {
     536           0 :     if( atomsymb[i]==name ) return true;
     537             :   }
     538             :   return false;
     539             : }
     540             : 
     541         249 : bool PDB::checkForAtom( AtomNumber a ) const {
     542             :   const auto p=number2index.find(a);
     543         249 :   return (p!=number2index.end());
     544             : }
     545             : 
     546           0 : Log& operator<<(Log& ostr, const PDB&  pdb) {
     547             :   const std::size_t bufferlen=1000;
     548             :   char buffer[bufferlen];
     549           0 :   for(unsigned i=0; i<pdb.positions.size(); i++) {
     550           0 :     std::snprintf(buffer,bufferlen,"ATOM %3u %8.3f %8.3f %8.3f\n",pdb.numbers[i].serial(),pdb.positions[i][0],pdb.positions[i][1],pdb.positions[i][2]);
     551           0 :     ostr<<buffer;
     552             :   }
     553           0 :   return ostr;
     554             : }
     555             : 
     556        1038 : Vector PDB::getPosition(AtomNumber a)const {
     557             :   const auto p=number2index.find(a);
     558        1038 :   if(p==number2index.end()) plumed_merror("atom not available");
     559        1038 :   else return positions[p->second];
     560             : }
     561             : 
     562           0 : std::vector<std::string> PDB::getArgumentNames()const {
     563           0 :   return argnames;
     564             : }
     565             : 
     566           0 : std::string PDB::getMtype() const {
     567           0 :   return mtype;
     568             : }
     569             : 
     570           0 : void PDB::print( const double& lunits, GenericMolInfo* mymoldat, OFile& ofile, const std::string& fmt ) {
     571           0 :   if( argnames.size()>0 ) {
     572           0 :     ofile.printf("REMARK ARG=%s", argnames[0].c_str() );
     573           0 :     for(unsigned i=1; i<argnames.size(); ++i) ofile.printf(",%s",argnames[i].c_str() );
     574           0 :     ofile.printf("\n"); ofile.printf("REMARK ");
     575             :   }
     576             :   std::string descr2;
     577           0 :   if(fmt.find("-")!=std::string::npos) {
     578           0 :     descr2="%s=" + fmt + " ";
     579             :   } else {
     580             :     // This ensures numbers are left justified (i.e. next to the equals sign
     581           0 :     std::size_t psign=fmt.find("%");
     582           0 :     plumed_assert( psign!=std::string::npos );
     583           0 :     descr2="%s=%-" + fmt.substr(psign+1) + " ";
     584             :   }
     585           0 :   for(std::map<std::string,std::vector<double> >::iterator it=arg_data.begin(); it!=arg_data.end(); ++it) ofile.printf( descr2.c_str(),it->first.c_str(), it->second[0] );
     586           0 :   if( argnames.size()>0 ) ofile.printf("\n");
     587           0 :   if( !mymoldat ) {
     588           0 :     for(unsigned i=0; i<positions.size(); ++i) {
     589             :       std::array<char,6> at;
     590             :       {
     591           0 :         const char* msg = h36::hy36encode(5,numbers[i].serial(),&at[0]);
     592           0 :         plumed_assert(msg==nullptr) << msg;
     593           0 :         at[5]=0;
     594             :       }
     595             :       std::array<char,5> res;
     596             :       {
     597           0 :         const char* msg = h36::hy36encode(4,i,&res[0]);
     598           0 :         plumed_assert(msg==nullptr) << msg;
     599           0 :         res[4]=0;
     600             :       }
     601           0 :       ofile.printf("ATOM  %s  X   RES  %s    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
     602             :                    &at[0], &res[0],
     603           0 :                    lunits*positions[i][0], lunits*positions[i][1], lunits*positions[i][2],
     604             :                    occupancy[i], beta[i] );
     605             :     }
     606             :   } else {
     607           0 :     for(unsigned i=0; i<positions.size(); ++i) {
     608             :       std::array<char,6> at;
     609             :       {
     610           0 :         const char* msg = h36::hy36encode(5,numbers[i].serial(),&at[0]);
     611           0 :         plumed_assert(msg==nullptr) << msg;
     612           0 :         at[5]=0;
     613             :       }
     614             :       std::array<char,5> res;
     615             :       {
     616           0 :         const char* msg = h36::hy36encode(4,mymoldat->getResidueNumber(numbers[i]),&res[0]);
     617           0 :         plumed_assert(msg==nullptr) << msg;
     618           0 :         res[4]=0;
     619             :       }
     620           0 :       ofile.printf("ATOM  %s %-4s %3s  %s    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
     621           0 :                    &at[0], mymoldat->getAtomName(numbers[i]).c_str(),
     622           0 :                    mymoldat->getResidueName(numbers[i]).c_str(), &res[0],
     623           0 :                    lunits*positions[i][0], lunits*positions[i][1], lunits*positions[i][2],
     624             :                    occupancy[i], beta[i] );
     625             :     }
     626             :   }
     627           0 :   ofile.printf("END\n");
     628           0 : }
     629             : 
     630       59906 : bool PDB::allowedResidue( const std::string& type, const std::string& residuename ) const {
     631       59906 :   if( type=="protein" ) {
     632       59906 :     if(residuename=="ALA") return true;
     633       56756 :     else if(residuename=="ARG") return true;
     634       53172 :     else if(residuename=="ASN") return true;
     635       50100 :     else if(residuename=="ASP") return true;
     636       47412 :     else if(residuename=="CYS") return true;
     637       47204 :     else if(residuename=="GLN") return true;
     638       42844 :     else if(residuename=="GLU") return true;
     639       40828 :     else if(residuename=="GLY") return true;
     640       38036 :     else if(residuename=="HIS") return true;
     641       38036 :     else if(residuename=="ILE") return true;
     642       34436 :     else if(residuename=="LEU") return true;
     643       28052 :     else if(residuename=="LYS") return true;
     644       23732 :     else if(residuename=="MET") return true;
     645       21412 :     else if(residuename=="PHE") return true;
     646       17188 :     else if(residuename=="PRO") return true;
     647       14020 :     else if(residuename=="SER") return true;
     648       11810 :     else if(residuename=="THR") return true;
     649        9740 :     else if(residuename=="TRP") return true;
     650        9740 :     else if(residuename=="TYR") return true;
     651        7532 :     else if(residuename=="VAL") return true;
     652             : // Terminal groups
     653        3010 :     else if(residuename=="ACE") return true;
     654        3010 :     else if(residuename=="NME") return true;
     655        3010 :     else if(residuename=="NH2") return true;
     656             : // Alternative residue names in common force fields
     657        3010 :     else if(residuename=="GLH") return true; // neutral GLU
     658        3010 :     else if(residuename=="ASH") return true; // neutral ASP
     659        3010 :     else if(residuename=="HID") return true; // HIS-D amber
     660        3010 :     else if(residuename=="HSD") return true; // HIS-D charmm
     661        3010 :     else if(residuename=="HIE") return true; // HIS-E amber
     662        2402 :     else if(residuename=="HSE") return true; // HIS-E charmm
     663        2402 :     else if(residuename=="HIP") return true; // HIS-P amber
     664        2402 :     else if(residuename=="HSP") return true; // HIS-P charmm
     665        2402 :     else if(residuename=="CYX") return true; // disulfide bridge CYS
     666             : // Weird amino acids
     667        2402 :     else if(residuename=="NLE") return true;
     668        2402 :     else if(residuename=="SFO") return true;
     669        2402 :     else return false;
     670           0 :   } else if( type=="dna" ) {
     671           0 :     if(residuename=="A") return true;
     672           0 :     else if(residuename=="A5") return true;
     673           0 :     else if(residuename=="A3") return true;
     674           0 :     else if(residuename=="AN") return true;
     675           0 :     else if(residuename=="G") return true;
     676           0 :     else if(residuename=="G5") return true;
     677           0 :     else if(residuename=="G3") return true;
     678           0 :     else if(residuename=="GN") return true;
     679           0 :     else if(residuename=="T") return true;
     680           0 :     else if(residuename=="T5") return true;
     681           0 :     else if(residuename=="T3") return true;
     682           0 :     else if(residuename=="TN") return true;
     683           0 :     else if(residuename=="C") return true;
     684           0 :     else if(residuename=="C5") return true;
     685           0 :     else if(residuename=="C3") return true;
     686           0 :     else if(residuename=="CN") return true;
     687           0 :     else if(residuename=="DA") return true;
     688           0 :     else if(residuename=="DA5") return true;
     689           0 :     else if(residuename=="DA3") return true;
     690           0 :     else if(residuename=="DAN") return true;
     691           0 :     else if(residuename=="DG") return true;
     692           0 :     else if(residuename=="DG5") return true;
     693           0 :     else if(residuename=="DG3") return true;
     694           0 :     else if(residuename=="DGN") return true;
     695           0 :     else if(residuename=="DT") return true;
     696           0 :     else if(residuename=="DT5") return true;
     697           0 :     else if(residuename=="DT3") return true;
     698           0 :     else if(residuename=="DTN") return true;
     699           0 :     else if(residuename=="DC") return true;
     700           0 :     else if(residuename=="DC5") return true;
     701           0 :     else if(residuename=="DC3") return true;
     702           0 :     else if(residuename=="DCN") return true;
     703           0 :     else return false;
     704           0 :   } else if( type=="rna" ) {
     705           0 :     if(residuename=="A") return true;
     706           0 :     else if(residuename=="A5") return true;
     707           0 :     else if(residuename=="A3") return true;
     708           0 :     else if(residuename=="AN") return true;
     709           0 :     else if(residuename=="G") return true;
     710           0 :     else if(residuename=="G5") return true;
     711           0 :     else if(residuename=="G3") return true;
     712           0 :     else if(residuename=="GN") return true;
     713           0 :     else if(residuename=="U") return true;
     714           0 :     else if(residuename=="U5") return true;
     715           0 :     else if(residuename=="U3") return true;
     716           0 :     else if(residuename=="UN") return true;
     717           0 :     else if(residuename=="C") return true;
     718           0 :     else if(residuename=="C5") return true;
     719           0 :     else if(residuename=="C3") return true;
     720           0 :     else if(residuename=="CN") return true;
     721           0 :     else if(residuename=="RA") return true;
     722           0 :     else if(residuename=="RA5") return true;
     723           0 :     else if(residuename=="RA3") return true;
     724           0 :     else if(residuename=="RAN") return true;
     725           0 :     else if(residuename=="RG") return true;
     726           0 :     else if(residuename=="RG5") return true;
     727           0 :     else if(residuename=="RG3") return true;
     728           0 :     else if(residuename=="RGN") return true;
     729           0 :     else if(residuename=="RU") return true;
     730           0 :     else if(residuename=="RU5") return true;
     731           0 :     else if(residuename=="RU3") return true;
     732           0 :     else if(residuename=="RUN") return true;
     733           0 :     else if(residuename=="RC") return true;
     734           0 :     else if(residuename=="RC5") return true;
     735           0 :     else if(residuename=="RC3") return true;
     736           0 :     else if(residuename=="RCN") return true;
     737           0 :     else return false;
     738           0 :   } else if( type=="water" ) {
     739           0 :     if(residuename=="SOL") return true;
     740           0 :     if(residuename=="WAT") return true;
     741           0 :     return false;
     742           0 :   } else if( type=="ion" ) {
     743           0 :     if(residuename=="IB+") return true;
     744           0 :     if(residuename=="CA") return true;
     745           0 :     if(residuename=="CL") return true;
     746           0 :     if(residuename=="NA") return true;
     747           0 :     if(residuename=="MG") return true;
     748           0 :     if(residuename=="K") return true;
     749           0 :     if(residuename=="RB") return true;
     750           0 :     if(residuename=="CS") return true;
     751           0 :     if(residuename=="LI") return true;
     752           0 :     if(residuename=="ZN") return true;
     753           0 :     return false;
     754             :   }
     755             :   return false;
     756             : }
     757             : 
     758             : }
     759             : 

Generated by: LCOV version 1.16