LCOV - code coverage report
Current view: top level - tools - PDB.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 238 452 52.7 %
Date: 2025-03-25 09:33:27 Functions: 27 44 61.4 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :    Copyright (c) 2011-2023 The plumed team
       3             :    (see the PEOPLE file at the root of the distribution for a list of names)
       4             : 
       5             :    See http://www.plumed.org for more information.
       6             : 
       7             :    This file is part of plumed, version 2.
       8             : 
       9             :    plumed is free software: you can redistribute it and/or modify
      10             :    it under the terms of the GNU Lesser General Public License as published by
      11             :    the Free Software Foundation, either version 3 of the License, or
      12             :    (at your option) any later version.
      13             : 
      14             :    plumed is distributed in the hope that it will be useful,
      15             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :    GNU Lesser General Public License for more details.
      18             : 
      19             :    You should have received a copy of the GNU Lesser General Public License
      20             :    along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      21             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      22             : #include "PDB.h"
      23             : #include "Tools.h"
      24             : #include "Log.h"
      25             : #include "h36.h"
      26             : #include <cstdio>
      27             : #include <iostream>
      28             : #include "core/GenericMolInfo.h"
      29             : #include "Tensor.h"
      30             : 
      31             : //+PLUMEDOC INTERNAL pdbreader
      32             : /*
      33             : PLUMED can use the PDB format in several places
      34             : 
      35             : - To read molecular structure (\ref MOLINFO).
      36             : - To read reference conformations (\ref RMSD, but also many other methods in \ref dists, \ref FIT_TO_TEMPLATE, etc).
      37             : 
      38             : The implemented PDB reader expects a file formatted correctly according to the
      39             : [PDB standard](http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html).
      40             : In particular, the following columns are read from ATOM records
      41             : \verbatim
      42             : columns | content
      43             : 1-6     | record name (ATOM or HETATM)
      44             : 7-11    | serial number of the atom (starting from 1)
      45             : 13-16   | atom name
      46             : 18-20   | residue name
      47             : 22      | chain id
      48             : 23-26   | residue number
      49             : 31-38   | x coordinate
      50             : 39-46   | y coordinate
      51             : 47-54   | z coordinate
      52             : 55-60   | occupancy
      53             : 61-66   | beta factor
      54             : \endverbatim
      55             : PLUMED parser is slightly more permissive than the official PDB format
      56             : in the fact that the format of real numbers is not fixed. In other words,
      57             : any real number that can be parsed is OK and the dot can be placed anywhere. However,
      58             : __columns are interpret strictly__. A sample PDB should look like the following
      59             : \verbatim
      60             : ATOM      2  CH3 ACE     1      12.932 -14.718  -6.016  1.00  1.00
      61             : ATOM      5  C   ACE     1      21.312  -9.928  -5.946  1.00  1.00
      62             : ATOM      9  CA  ALA     2      19.462 -11.088  -8.986  1.00  1.00
      63             : \endverbatim
      64             : 
      65             : Notice that serial numbers need not to be consecutive. In the three-line example above,
      66             : only the coordinates of three atoms are provided. This is perfectly legal and indicates PLUMED
      67             : that information about these atoms only is available. This could be both for structural
      68             : information in \ref MOLINFO, where the other atoms would have no name assigned, and for
      69             : reference structures used in \ref RMSD, where only the provided atoms would be used to compute RMSD.
      70             : 
      71             : \par Occupancy and beta factors
      72             : 
      73             : PLUMED reads also occupancy and beta factors that however are given a very special meaning.
      74             : In cases where the PDB structure is used as a reference for an alignment (that's the case
      75             : for instance in \ref RMSD and in \ref FIT_TO_TEMPLATE), the occupancy column is used
      76             : to provide the weight of each atom in the alignment. In cases where, perhaps after alignment,
      77             : the displacement between running coordinates and the provided PDB is computed, the beta factors
      78             : are used as weight for the displacement.
      79             : Since setting the weights to zero is the same as __not__ including an atom in the alignment or
      80             : displacement calculation, the two following reference files would be equivalent when used in an \ref RMSD
      81             : calculation. First file:
      82             : \verbatim
      83             : ATOM      2  CH3 ACE     1      12.932 -14.718  -6.016  1.00  1.00
      84             : ATOM      5  C   ACE     1      21.312  -9.928  -5.946  1.00  1.00
      85             : ATOM      9  CA  ALA     2      19.462 -11.088  -8.986  0.00  0.00
      86             : \endverbatim
      87             : Second file:
      88             : \verbatim
      89             : ATOM      2  CH3 ACE     1      12.932 -14.718  -6.016  1.00  1.00
      90             : ATOM      5  C   ACE     1      21.312  -9.928  -5.946  1.00  1.00
      91             : \endverbatim
      92             : However notice that many extra atoms with zero weight might slow down the calculation, so
      93             : removing lines is better than setting their weights to zero.
      94             : In addition, weights for alignment need not to be equivalent to weights for displacement.
      95             : Starting with PLUMED 2.7, if all the weights are set to zero they will be normalized to be equal to the
      96             : inverse of the number of involved atoms. This means that it will be possible to use files with
      97             : the weight columns set to zero obtaining a meaningful result. In previous PLUMED versions,
      98             : setting all weights to zero was resulting in an error instead.
      99             : 
     100             : 
     101             : \par Systems with more than 100k atoms
     102             : 
     103             : Notice that it very likely does not make any sense to compute the \ref RMSD or any other structural
     104             : deviation __using__ so many atoms. However, if the protein for which you want to compute \ref RMSD
     105             : has atoms with large serial numbers (e.g. because it is located __after__ solvent in the sorted list of atoms)
     106             : you might end up with troubles with the limitations of the PDB format. Indeed, since there are 5
     107             : columns available for atom serial number, this number cannot be larger than 99999.
     108             : In addition, providing \ref MOLINFO with names associated to atoms with a serial larger than 99999 would be impossible.
     109             : 
     110             : Since PLUMED 2.4 we allow [hybrid 36](http://cci.lbl.gov/hybrid_36/) format to be used to specify atom numbers.
     111             : This format is not particularly widespread, but has the nice feature that it provides a one-to-one mapping
     112             : between numbers up to approximately 80 millions and strings with 5 characters, plus it is backward compatible
     113             : for numbers smaller than 100000. This is not true for notations like the hex notation exported by VMD.
     114             : Using the hybrid 36 format, the ATOM records for atom ranging from 99997 to 100002 would read like these:
     115             : \verbatim
     116             : ATOM  99997  Ar      X   1      45.349  38.631  15.116  1.00  1.00
     117             : ATOM  99998  Ar      X   1      46.189  38.631  15.956  1.00  1.00
     118             : ATOM  99999  Ar      X   1      46.189  39.471  15.116  1.00  1.00
     119             : ATOM  A0000  Ar      X   1      45.349  39.471  15.956  1.00  1.00
     120             : ATOM  A0000  Ar      X   1      45.349  38.631  16.796  1.00  1.00
     121             : ATOM  A0001  Ar      X   1      46.189  38.631  17.636  1.00  1.00
     122             : \endverbatim
     123             : There are tools that can be found to translate from integers to strings and back using hybrid 36 format
     124             : (a simple python script can be found [here](https://sourceforge.net/p/cctbx/code/HEAD/tree/trunk/iotbx/pdb/hybrid_36.py)).
     125             : In addition, as of PLUMED 2.5, we provide a \ref pdbrenumber "command line tool" that can be used to renumber atoms in a PDB file.
     126             : 
     127             : */
     128             : //+ENDPLUMEDOC
     129             : 
     130             : 
     131             : namespace PLMD {
     132             : 
     133           0 : void PDB::setAtomNumbers( const std::vector<AtomNumber>& atoms ) {
     134           0 :   positions.resize( atoms.size() );
     135           0 :   occupancy.resize( atoms.size() );
     136           0 :   beta.resize( atoms.size() );
     137           0 :   numbers.resize( atoms.size() );
     138           0 :   for(unsigned i=0; i<atoms.size(); ++i) {
     139           0 :     numbers[i]=atoms[i];
     140           0 :     beta[i]=1.0;
     141           0 :     occupancy[i]=1.0;
     142             :   }
     143           0 : }
     144             : 
     145           0 : void PDB::setArgumentNames( const std::vector<std::string>& argument_names ) {
     146           0 :   argnames.resize( argument_names.size() );
     147           0 :   std::vector<double> tmp(1,0);
     148           0 :   for(unsigned i=0; i<argument_names.size(); ++i) {
     149             :     argnames[i]=argument_names[i];
     150           0 :     arg_data.insert( std::pair<std::string,std::vector<double> >( argnames[i], tmp ) );
     151             :   }
     152           0 : }
     153             : 
     154        2148 : bool PDB::getArgumentValue( const std::string& name, std::vector<double>& value ) const {
     155             :   std::map<std::string,std::vector<double> >::const_iterator it = arg_data.find(name);
     156        2148 :   if( it!=arg_data.end() ) {
     157        2148 :     if( value.size()!=it->second.size() ) {
     158             :       return false;
     159             :     }
     160        4300 :     for(unsigned i=0; i<value.size(); ++i) {
     161        2152 :       value[i] = it->second[i];
     162             :     }
     163             :     return true;
     164             :   }
     165             :   return false;
     166             : }
     167             : 
     168           0 : void PDB::setAtomPositions( const std::vector<Vector>& pos ) {
     169           0 :   plumed_assert( pos.size()==positions.size() );
     170           0 :   for(unsigned i=0; i<positions.size(); ++i) {
     171           0 :     positions[i]=pos[i];
     172             :   }
     173           0 : }
     174             : 
     175           0 : void PDB::setArgumentValue( const std::string& argname, const double& val ) {
     176             :   // First set the value of the value of the argument in the map
     177           0 :   arg_data.find(argname)->second.resize(1);
     178           0 :   arg_data.find(argname)->second[0] = val;
     179           0 : }
     180             : 
     181             : // bool PDB::hasRequiredProperties( const std::vector<std::string>& inproperties ){
     182             : //   bool hasprop=false;
     183             : //   for(unsigned i=0;i<remark.size();++i){
     184             : //       if( remark[i].find("PROPERTIES=")!=std::string::npos){ hasprop=true; break; }
     185             : //   }
     186             : //   if( !hasprop ){
     187             : //       std::string mypropstr="PROPERTIES=" + inproperties[0];
     188             : //       for(unsigned i=1;i<inproperties.size();++i) mypropstr += "," + inproperties[i];
     189             : //       remark.push_back( mypropstr );
     190             : //   }
     191             : //   // Now check that all required properties are there
     192             : //   for(unsigned i=0;i<inproperties.size();++i){
     193             : //       hasprop=false;
     194             : //       for(unsigned j=0;j<remark.size();++j){
     195             : //           if( remark[j].find(inproperties[i]+"=")!=std::string::npos){ hasprop=true; break; }
     196             : //       }
     197             : //       if( !hasprop ) return false;
     198             : //   }
     199             : //   return true;
     200             : // }
     201             : 
     202           0 : void PDB::addBlockEnd( const unsigned& end ) {
     203           0 :   block_ends.push_back( end );
     204           0 : }
     205             : 
     206           3 : unsigned PDB::getNumberOfAtomBlocks()const {
     207           3 :   return block_ends.size();
     208             : }
     209             : 
     210           6 : const std::vector<unsigned> & PDB::getAtomBlockEnds()const {
     211           6 :   return block_ends;
     212             : }
     213             : 
     214    23575320 : const std::vector<Vector> & PDB::getPositions()const {
     215    23575320 :   return positions;
     216             : }
     217             : 
     218           0 : void PDB::setPositions(const std::vector<Vector> &v ) {
     219           0 :   plumed_assert( v.size()==positions.size() );
     220           0 :   positions=v;
     221           0 : }
     222             : 
     223      153916 : const std::vector<double> & PDB::getOccupancy()const {
     224      153916 :   return occupancy;
     225             : }
     226             : 
     227      152249 : const std::vector<double> & PDB::getBeta()const {
     228      152249 :   return beta;
     229             : }
     230             : 
     231        4607 : void PDB::addRemark( std::vector<std::string>& v1 ) {
     232        4607 :   Tools::parse(v1,"TYPE",mtype);
     233        4607 :   Tools::parseVector(v1,"ARG",argnames);
     234       12385 :   for(unsigned i=0; i<v1.size(); ++i) {
     235        7778 :     if( v1[i].find("=")!=std::string::npos ) {
     236             :       std::size_t eq=v1[i].find_first_of('=');
     237        7164 :       std::string name=v1[i].substr(0,eq);
     238        7164 :       std::string sval=v1[i].substr(eq+1);
     239             :       // double val; Tools::convert( sval, val );
     240        7164 :       std::vector<std::string> words=Tools::getWords(sval,"\t\n ,");
     241        7164 :       std::vector<double> val( words.size() );
     242       14336 :       for(unsigned i=0; i<val.size(); ++i) {
     243        7172 :         Tools::convert( words[i], val[i] );
     244             :       }
     245       14328 :       arg_data.insert( std::pair<std::string,std::vector<double> >( name, val ) );
     246        7164 :     } else {
     247         614 :       flags.push_back(v1[i]);
     248             :     }
     249             :   }
     250        4607 : }
     251             : 
     252           0 : bool PDB::hasFlag( const std::string& fname ) const {
     253           0 :   for(unsigned i=0; i<flags.size(); ++i) {
     254           0 :     if( flags[i]==fname ) {
     255             :       return true;
     256             :     }
     257             :   }
     258             :   return false;
     259             : }
     260             : 
     261             : 
     262      314640 : const std::vector<AtomNumber> & PDB::getAtomNumbers()const {
     263      314640 :   return numbers;
     264             : }
     265             : 
     266           0 : const Vector & PDB::getBoxAxs()const {
     267           0 :   return BoxXYZ;
     268             : }
     269             : 
     270           0 : const Vector & PDB::getBoxAng()const {
     271           0 :   return BoxABG;
     272             : }
     273             : 
     274          12 : const Tensor & PDB::getBoxVec()const {
     275          12 :   return Box;
     276             : }
     277             : 
     278     6947345 : std::string PDB::getAtomName(AtomNumber a)const {
     279             :   const auto p=number2index.find(a);
     280     6947345 :   if(p==number2index.end()) {
     281             :     std::string num;
     282           0 :     Tools::convert( a.serial(), num );
     283           0 :     plumed_merror("Name of atom " + num + " not found" );
     284             :   } else {
     285     6947345 :     return atomsymb[p->second];
     286             :   }
     287             : }
     288             : 
     289      166232 : unsigned PDB::getResidueNumber(AtomNumber a)const {
     290             :   const auto p=number2index.find(a);
     291      166232 :   if(p==number2index.end()) {
     292             :     std::string num;
     293           0 :     Tools::convert( a.serial(), num );
     294           0 :     plumed_merror("Residue for atom " + num + " not found" );
     295             :   } else {
     296      166232 :     return residue[p->second];
     297             :   }
     298             : }
     299             : 
     300      180036 : std::string PDB::getResidueName(AtomNumber a) const {
     301             :   const auto p=number2index.find(a);
     302      180036 :   if(p==number2index.end()) {
     303             :     std::string num;
     304           0 :     Tools::convert( a.serial(), num );
     305           0 :     plumed_merror("Residue for atom " + num + " not found" );
     306             :   } else {
     307      180036 :     return residuenames[p->second];
     308             :   }
     309             : }
     310             : 
     311   243528438 : unsigned PDB::size()const {
     312   243528438 :   return positions.size();
     313             : }
     314             : 
     315        4584 : bool PDB::readFromFilepointer(FILE *fp,bool naturalUnits,double scale) {
     316             :   //cerr<<file<<endl;
     317             :   bool file_is_alive=false;
     318        4584 :   if(naturalUnits) {
     319             :     scale=1.0;
     320             :   }
     321             :   std::string line;
     322             :   fpos_t pos;
     323             :   bool between_ters=true;
     324      468132 :   while(Tools::getline(fp,line)) {
     325             :     //cerr<<line<<"\n";
     326      467624 :     fgetpos (fp,&pos);
     327     3354208 :     while(line.length()<80) {
     328     2886584 :       line.push_back(' ');
     329             :     }
     330      467624 :     std::string record=line.substr(0,6);
     331      467624 :     std::string serial=line.substr(6,5);
     332      467624 :     std::string atomname=line.substr(12,4);
     333      467624 :     std::string residuename=line.substr(17,3);
     334      467624 :     std::string chainID=line.substr(21,1);
     335      467624 :     std::string resnum=line.substr(22,4);
     336      467624 :     std::string x=line.substr(30,8);
     337      467624 :     std::string y=line.substr(38,8);
     338      467624 :     std::string z=line.substr(46,8);
     339      467624 :     std::string occ=line.substr(54,6);
     340      467624 :     std::string bet=line.substr(60,6);
     341      467624 :     std::string BoxX=line.substr(6,9);
     342      467624 :     std::string BoxY=line.substr(15,9);
     343      467624 :     std::string BoxZ=line.substr(24,9);
     344      467624 :     std::string BoxA=line.substr(33,7);
     345      467624 :     std::string BoxB=line.substr(40,7);
     346      467624 :     std::string BoxG=line.substr(47,7);
     347      467624 :     Tools::trim(record);
     348      467624 :     if(record=="TER") {
     349             :       between_ters=false;
     350         245 :       block_ends.push_back( positions.size() );
     351             :     }
     352      467624 :     if(record=="END") {
     353             :       file_is_alive=true;
     354             :       break;
     355             :     }
     356      463700 :     if(record=="ENDMDL") {
     357             :       file_is_alive=true;
     358             :       break;
     359             :     }
     360      463548 :     if(record=="REMARK") {
     361             :       std::vector<std::string> v1;
     362        9214 :       v1=Tools::getWords(line.substr(6));
     363        4607 :       addRemark( v1 );
     364        4607 :     }
     365      463548 :     if(record=="CRYST1") {
     366          80 :       Tools::convert(BoxX,BoxXYZ[0]);
     367          80 :       Tools::convert(BoxY,BoxXYZ[1]);
     368          80 :       Tools::convert(BoxZ,BoxXYZ[2]);
     369          80 :       Tools::convert(BoxA,BoxABG[0]);
     370          80 :       Tools::convert(BoxB,BoxABG[1]);
     371          80 :       Tools::convert(BoxG,BoxABG[2]);
     372          80 :       BoxXYZ*=scale;
     373          80 :       double cosA=std::cos(BoxABG[0]*pi/180.);
     374          80 :       double cosB=std::cos(BoxABG[1]*pi/180.);
     375          80 :       double cosG=std::cos(BoxABG[2]*pi/180.);
     376          80 :       double sinG=std::sin(BoxABG[2]*pi/180.);
     377         320 :       for (unsigned i=0; i<3; i++) {
     378         240 :         Box[i][0]=0.;
     379         240 :         Box[i][1]=0.;
     380         240 :         Box[i][2]=0.;
     381             :       }
     382          80 :       Box[0][0]=BoxXYZ[0];
     383          80 :       Box[1][0]=BoxXYZ[1]*cosG;
     384          80 :       Box[1][1]=BoxXYZ[1]*sinG;
     385          80 :       Box[2][0]=BoxXYZ[2]*cosB;
     386          80 :       Box[2][1]=(BoxXYZ[2]*BoxXYZ[1]*cosA-Box[2][0]*Box[1][0])/Box[1][1];
     387          80 :       Box[2][2]=std::sqrt(BoxXYZ[2]*BoxXYZ[2]-Box[2][0]*Box[2][0]-Box[2][1]*Box[2][1]);
     388             :     }
     389      469447 :     if(record=="ATOM" || record=="HETATM") {
     390             :       between_ters=true;
     391             :       AtomNumber a;
     392      457649 :       unsigned resno=0; // GB: when resnum string is not present, we set res number to zero
     393             :       double o,b;
     394      457649 :       Vector p;
     395             :       {
     396             :         int result;
     397      457649 :         auto trimmed=serial;
     398      457649 :         Tools::trim(trimmed);
     399      457711 :         while(trimmed.length()<5) {
     400         124 :           trimmed = std::string(" ") + trimmed;
     401             :         }
     402      457649 :         const char* errmsg = h36::hy36decode(5, trimmed.c_str(),trimmed.length(), &result);
     403      457649 :         if(errmsg) {
     404           0 :           std::string msg(errmsg);
     405           0 :           plumed_merror(msg);
     406             :         }
     407      457649 :         a.setSerial(result);
     408             :       }
     409             : 
     410             :       // allow skipping residue number
     411             :       {
     412      457649 :         auto trimmed=resnum;
     413      457649 :         Tools::trim(trimmed);
     414      457649 :         if(trimmed.length()>0) {
     415             :           int result;
     416      457649 :           while(trimmed.length()<4) {
     417           0 :             trimmed = std::string(" ") + trimmed;
     418             :           }
     419      457649 :           const char* errmsg = h36::hy36decode(4, trimmed.c_str(),trimmed.length(), &result);
     420      457649 :           if(errmsg) {
     421           0 :             std::string msg(errmsg);
     422           0 :             plumed_merror(msg);
     423             :           }
     424      457649 :           resno=result;
     425             :         }
     426             :       }
     427             : 
     428      457649 :       Tools::convert(occ,o);
     429      457649 :       Tools::convert(bet,b);
     430      457649 :       Tools::convert(x,p[0]);
     431      457649 :       Tools::convert(y,p[1]);
     432      457649 :       Tools::convert(z,p[2]);
     433             :       // scale into nm
     434      457649 :       p*=scale;
     435      457649 :       numbers.push_back(a);
     436      457649 :       number2index[a]=positions.size();
     437      457649 :       std::size_t startpos=atomname.find_first_not_of(" \t");
     438      457649 :       std::size_t endpos=atomname.find_last_not_of(" \t");
     439      457649 :       atomsymb.push_back( atomname.substr(startpos, endpos-startpos+1) );
     440      457649 :       residue.push_back(resno);
     441      457649 :       chain.push_back(chainID);
     442      457649 :       occupancy.push_back(o);
     443      457649 :       beta.push_back(b);
     444      457649 :       positions.push_back(p);
     445      457649 :       residuenames.push_back(residuename);
     446             :     }
     447             :   }
     448        4584 :   if( between_ters ) {
     449        4392 :     block_ends.push_back( positions.size() );
     450             :   }
     451        4584 :   return file_is_alive;
     452             : }
     453             : 
     454         458 : bool PDB::read(const std::string&file,bool naturalUnits,double scale) {
     455         458 :   FILE* fp=std::fopen(file.c_str(),"r");
     456         458 :   if(!fp) {
     457             :     return false;
     458             :   }
     459             : // call fclose when exiting this function
     460             :   auto deleter=[](auto f) {
     461         457 :     std::fclose(f);
     462             :   };
     463             :   std::unique_ptr<FILE,decltype(deleter)> fp_deleter(fp,deleter);
     464         457 :   readFromFilepointer(fp,naturalUnits,scale);
     465             :   return true;
     466             : }
     467             : 
     468         296 : void PDB::getChainNames( std::vector<std::string>& chains ) const {
     469         296 :   chains.resize(0);
     470         296 :   chains.push_back( chain[0] );
     471      574777 :   for(unsigned i=1; i<size(); ++i) {
     472      574481 :     if( chains[chains.size()-1]!=chain[i] ) {
     473         756 :       chains.push_back( chain[i] );
     474             :     }
     475             :   }
     476         296 : }
     477             : 
     478       11906 : void PDB::getResidueRange( const std::string& chainname, unsigned& res_start, unsigned& res_end, std::string& errmsg ) const {
     479             :   bool inres=false, foundchain=false;
     480    31023905 :   for(unsigned i=0; i<size(); ++i) {
     481    31011999 :     if( chain[i]==chainname ) {
     482    26714341 :       if(!inres) {
     483       11906 :         if(foundchain) {
     484           0 :           errmsg="found second start of chain named " + chainname;
     485             :         }
     486       11906 :         res_start=residue[i];
     487             :       }
     488             :       inres=true;
     489             :       foundchain=true;
     490     4297658 :     } else if( inres && chain[i]!=chainname ) {
     491             :       inres=false;
     492       11340 :       res_end=residue[i-1];
     493             :     }
     494             :   }
     495       11906 :   if(inres) {
     496         566 :     res_end=residue[size()-1];
     497             :   }
     498       11906 : }
     499             : 
     500         228 : void PDB::getAtomRange( const std::string& chainname, AtomNumber& a_start, AtomNumber& a_end, std::string& errmsg ) const {
     501             :   bool inres=false, foundchain=false;
     502      549225 :   for(unsigned i=0; i<size(); ++i) {
     503      548997 :     if( chain[i]==chainname ) {
     504      112687 :       if(!inres) {
     505         228 :         if(foundchain) {
     506           0 :           errmsg="found second start of chain named " + chainname;
     507             :         }
     508         228 :         a_start=numbers[i];
     509             :       }
     510             :       inres=true;
     511             :       foundchain=true;
     512      436310 :     } else if( inres && chain[i]!=chainname ) {
     513             :       inres=false;
     514         110 :       a_end=numbers[i-1];
     515             :     }
     516             :   }
     517         228 :   if(inres) {
     518         118 :     a_end=numbers[size()-1];
     519             :   }
     520         228 : }
     521             : 
     522        9900 : std::string PDB::getResidueName( const unsigned& resnum ) const {
     523    12195264 :   for(unsigned i=0; i<size(); ++i) {
     524    12195264 :     if( residue[i]==resnum ) {
     525        9900 :       return residuenames[i];
     526             :     }
     527             :   }
     528             :   std::string num;
     529           0 :   Tools::convert( resnum, num );
     530           0 :   plumed_merror("residue " + num + " not found" );
     531             : }
     532             : 
     533       50084 : std::string PDB::getResidueName(const unsigned& resnum,const std::string& chainid ) const {
     534    64922933 :   for(unsigned i=0; i<size(); ++i) {
     535    64973197 :     if( residue[i]==resnum && ( chainid=="*" || chain[i]==chainid) ) {
     536       50084 :       return residuenames[i];
     537             :     }
     538             :   }
     539             :   std::string num;
     540           0 :   Tools::convert( resnum, num );
     541           0 :   plumed_merror("residue " + num + " not found in chain " + chainid );
     542             : }
     543             : 
     544             : 
     545       22020 : AtomNumber PDB::getNamedAtomFromResidue( const std::string& aname, const unsigned& resnum ) const {
     546    27242772 :   for(unsigned i=0; i<size(); ++i) {
     547    27242772 :     if( residue[i]==resnum && atomsymb[i]==aname ) {
     548       22020 :       return numbers[i];
     549             :     }
     550             :   }
     551             :   std::string num;
     552           0 :   Tools::convert( resnum, num );
     553           0 :   plumed_merror("residue " + num + " does not contain an atom named " + aname );
     554             : }
     555             : 
     556        2898 : AtomNumber PDB::getNamedAtomFromResidueAndChain( const std::string& aname, const unsigned& resnum, const std::string& chainid ) const {
     557     1116492 :   for(unsigned i=0; i<size(); ++i) {
     558     1119390 :     if( residue[i]==resnum && atomsymb[i]==aname && ( chainid=="*" || chain[i]==chainid) ) {
     559        2898 :       return numbers[i];
     560             :     }
     561             :   }
     562             :   std::string num;
     563           0 :   Tools::convert( resnum, num );
     564           0 :   plumed_merror("residue " + num + " from chain " + chainid + " does not contain an atom named " + aname );
     565             : }
     566             : 
     567       35470 : std::vector<AtomNumber> PDB::getAtomsInResidue(const unsigned& resnum,const std::string& chainid)const {
     568             :   std::vector<AtomNumber> tmp;
     569    95754470 :   for(unsigned i=0; i<size(); ++i) {
     570    96261470 :     if( residue[i]==resnum && ( chainid=="*" || chain[i]==chainid) ) {
     571      542470 :       tmp.push_back(numbers[i]);
     572             :     }
     573             :   }
     574       35470 :   if(tmp.size()==0) {
     575             :     std::string num;
     576           0 :     Tools::convert( resnum, num );
     577           0 :     plumed_merror("Cannot find residue " + num + " from chain " + chainid  );
     578             :   }
     579       35470 :   return tmp;
     580             : }
     581             : 
     582           0 : std::vector<AtomNumber> PDB::getAtomsInChain(const std::string& chainid)const {
     583             :   std::vector<AtomNumber> tmp;
     584           0 :   for(unsigned i=0; i<size(); ++i) {
     585           0 :     if( chainid=="*" || chain[i]==chainid ) {
     586           0 :       tmp.push_back(numbers[i]);
     587             :     }
     588             :   }
     589           0 :   if(tmp.size()==0) {
     590           0 :     plumed_merror("Cannot find atoms from chain " + chainid  );
     591             :   }
     592           0 :   return tmp;
     593             : }
     594             : 
     595        7710 : std::string PDB::getChainID(const unsigned& resnumber) const {
     596     9481600 :   for(unsigned i=0; i<size(); ++i) {
     597     9481600 :     if(resnumber==residue[i]) {
     598        7710 :       return chain[i];
     599             :     }
     600             :   }
     601           0 :   plumed_merror("Not enough residues in pdb input file");
     602             : }
     603             : 
     604           0 : std::string PDB::getChainID(AtomNumber a) const {
     605             :   const auto p=number2index.find(a);
     606           0 :   if(p==number2index.end()) {
     607             :     std::string num;
     608           0 :     Tools::convert( a.serial(), num );
     609           0 :     plumed_merror("Chain for atom " + num + " not found" );
     610             :   }
     611           0 :   return chain[p->second];
     612             : }
     613             : 
     614           0 : bool PDB::checkForResidue( const std::string& name ) const {
     615           0 :   for(unsigned i=0; i<size(); ++i) {
     616           0 :     if( residuenames[i]==name ) {
     617             :       return true;
     618             :     }
     619             :   }
     620             :   return false;
     621             : }
     622             : 
     623           0 : bool PDB::checkForAtom( const std::string& name ) const {
     624           0 :   for(unsigned i=0; i<size(); ++i) {
     625           0 :     if( atomsymb[i]==name ) {
     626             :       return true;
     627             :     }
     628             :   }
     629             :   return false;
     630             : }
     631             : 
     632         249 : bool PDB::checkForAtom( AtomNumber a ) const {
     633             :   const auto p=number2index.find(a);
     634         249 :   return (p!=number2index.end());
     635             : }
     636             : 
     637           0 : Log& operator<<(Log& ostr, const PDB&  pdb) {
     638             :   const std::size_t bufferlen=1000;
     639             :   char buffer[bufferlen];
     640           0 :   for(unsigned i=0; i<pdb.positions.size(); i++) {
     641           0 :     std::snprintf(buffer,bufferlen,"ATOM %3u %8.3f %8.3f %8.3f\n",pdb.numbers[i].serial(),pdb.positions[i][0],pdb.positions[i][1],pdb.positions[i][2]);
     642           0 :     ostr<<buffer;
     643             :   }
     644           0 :   return ostr;
     645             : }
     646             : 
     647        1038 : Vector PDB::getPosition(AtomNumber a)const {
     648             :   const auto p=number2index.find(a);
     649        1038 :   if(p==number2index.end()) {
     650           0 :     plumed_merror("atom not available");
     651             :   } else {
     652        1038 :     return positions[p->second];
     653             :   }
     654             : }
     655             : 
     656           0 : std::vector<std::string> PDB::getArgumentNames()const {
     657           0 :   return argnames;
     658             : }
     659             : 
     660           0 : std::string PDB::getMtype() const {
     661           0 :   return mtype;
     662             : }
     663             : 
     664           0 : void PDB::print( const double& lunits, GenericMolInfo* mymoldat, OFile& ofile, const std::string& fmt ) {
     665           0 :   if( argnames.size()>0 ) {
     666           0 :     ofile.printf("REMARK ARG=%s", argnames[0].c_str() );
     667           0 :     for(unsigned i=1; i<argnames.size(); ++i) {
     668           0 :       ofile.printf(",%s",argnames[i].c_str() );
     669             :     }
     670           0 :     ofile.printf("\n");
     671           0 :     ofile.printf("REMARK ");
     672             :   }
     673             :   std::string descr2;
     674           0 :   if(fmt.find("-")!=std::string::npos) {
     675           0 :     descr2="%s=" + fmt + " ";
     676             :   } else {
     677             :     // This ensures numbers are left justified (i.e. next to the equals sign
     678           0 :     std::size_t psign=fmt.find("%");
     679           0 :     plumed_assert( psign!=std::string::npos );
     680           0 :     descr2="%s=%-" + fmt.substr(psign+1) + " ";
     681             :   }
     682           0 :   for(std::map<std::string,std::vector<double> >::iterator it=arg_data.begin(); it!=arg_data.end(); ++it) {
     683           0 :     ofile.printf( descr2.c_str(),it->first.c_str(), it->second[0] );
     684             :   }
     685           0 :   if( argnames.size()>0 ) {
     686           0 :     ofile.printf("\n");
     687             :   }
     688           0 :   if( !mymoldat ) {
     689           0 :     for(unsigned i=0; i<positions.size(); ++i) {
     690             :       std::array<char,6> at;
     691             :       {
     692           0 :         const char* msg = h36::hy36encode(5,numbers[i].serial(),&at[0]);
     693           0 :         plumed_assert(msg==nullptr) << msg;
     694           0 :         at[5]=0;
     695             :       }
     696             :       std::array<char,5> res;
     697             :       {
     698           0 :         const char* msg = h36::hy36encode(4,i,&res[0]);
     699           0 :         plumed_assert(msg==nullptr) << msg;
     700           0 :         res[4]=0;
     701             :       }
     702           0 :       ofile.printf("ATOM  %s  X   RES  %s    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
     703             :                    &at[0], &res[0],
     704           0 :                    lunits*positions[i][0], lunits*positions[i][1], lunits*positions[i][2],
     705             :                    occupancy[i], beta[i] );
     706             :     }
     707             :   } else {
     708           0 :     for(unsigned i=0; i<positions.size(); ++i) {
     709             :       std::array<char,6> at;
     710             :       {
     711           0 :         const char* msg = h36::hy36encode(5,numbers[i].serial(),&at[0]);
     712           0 :         plumed_assert(msg==nullptr) << msg;
     713           0 :         at[5]=0;
     714             :       }
     715             :       std::array<char,5> res;
     716             :       {
     717           0 :         const char* msg = h36::hy36encode(4,mymoldat->getResidueNumber(numbers[i]),&res[0]);
     718           0 :         plumed_assert(msg==nullptr) << msg;
     719           0 :         res[4]=0;
     720             :       }
     721           0 :       ofile.printf("ATOM  %s %-4s %3s  %s    %8.3f%8.3f%8.3f%6.2f%6.2f\n",
     722           0 :                    &at[0], mymoldat->getAtomName(numbers[i]).c_str(),
     723           0 :                    mymoldat->getResidueName(numbers[i]).c_str(), &res[0],
     724           0 :                    lunits*positions[i][0], lunits*positions[i][1], lunits*positions[i][2],
     725             :                    occupancy[i], beta[i] );
     726             :     }
     727             :   }
     728           0 :   ofile.printf("END\n");
     729           0 : }
     730             : 
     731       59906 : bool PDB::allowedResidue( const std::string& type, const std::string& residuename ) const {
     732       59906 :   if( type=="protein" ) {
     733       59906 :     if(residuename=="ALA") {
     734             :       return true;
     735       56756 :     } else if(residuename=="ARG") {
     736             :       return true;
     737       53172 :     } else if(residuename=="ASN") {
     738             :       return true;
     739       50100 :     } else if(residuename=="ASP") {
     740             :       return true;
     741       47412 :     } else if(residuename=="CYS") {
     742             :       return true;
     743       47204 :     } else if(residuename=="GLN") {
     744             :       return true;
     745       42844 :     } else if(residuename=="GLU") {
     746             :       return true;
     747       40828 :     } else if(residuename=="GLY") {
     748             :       return true;
     749       38036 :     } else if(residuename=="HIS") {
     750             :       return true;
     751       38036 :     } else if(residuename=="ILE") {
     752             :       return true;
     753       34436 :     } else if(residuename=="LEU") {
     754             :       return true;
     755       28052 :     } else if(residuename=="LYS") {
     756             :       return true;
     757       23732 :     } else if(residuename=="MET") {
     758             :       return true;
     759       21412 :     } else if(residuename=="PHE") {
     760             :       return true;
     761       17188 :     } else if(residuename=="PRO") {
     762             :       return true;
     763       14020 :     } else if(residuename=="SER") {
     764             :       return true;
     765       11810 :     } else if(residuename=="THR") {
     766             :       return true;
     767        9740 :     } else if(residuename=="TRP") {
     768             :       return true;
     769        9740 :     } else if(residuename=="TYR") {
     770             :       return true;
     771        7532 :     } else if(residuename=="VAL") {
     772             :       return true;
     773             :     }
     774             : // Terminal groups
     775        3010 :     else if(residuename=="ACE") {
     776             :       return true;
     777        3010 :     } else if(residuename=="NME") {
     778             :       return true;
     779        3010 :     } else if(residuename=="NH2") {
     780             :       return true;
     781             :     }
     782             : // Alternative residue names in common force fields
     783        3010 :     else if(residuename=="GLH") {
     784             :       return true;  // neutral GLU
     785        3010 :     } else if(residuename=="ASH") {
     786             :       return true;  // neutral ASP
     787        3010 :     } else if(residuename=="HID") {
     788             :       return true;  // HIS-D amber
     789        3010 :     } else if(residuename=="HSD") {
     790             :       return true;  // HIS-D charmm
     791        3010 :     } else if(residuename=="HIE") {
     792             :       return true;  // HIS-E amber
     793        2402 :     } else if(residuename=="HSE") {
     794             :       return true;  // HIS-E charmm
     795        2402 :     } else if(residuename=="HIP") {
     796             :       return true;  // HIS-P amber
     797        2402 :     } else if(residuename=="HSP") {
     798             :       return true;  // HIS-P charmm
     799        2402 :     } else if(residuename=="CYX") {
     800             :       return true;  // disulfide bridge CYS
     801             :     }
     802             : // Weird amino acids
     803        2402 :     else if(residuename=="NLE") {
     804             :       return true;
     805        2402 :     } else if(residuename=="SFO") {
     806             :       return true;
     807             :     } else {
     808        2402 :       return false;
     809             :     }
     810           0 :   } else if( type=="dna" ) {
     811           0 :     if(residuename=="A") {
     812             :       return true;
     813           0 :     } else if(residuename=="A5") {
     814             :       return true;
     815           0 :     } else if(residuename=="A3") {
     816             :       return true;
     817           0 :     } else if(residuename=="AN") {
     818             :       return true;
     819           0 :     } else if(residuename=="G") {
     820             :       return true;
     821           0 :     } else if(residuename=="G5") {
     822             :       return true;
     823           0 :     } else if(residuename=="G3") {
     824             :       return true;
     825           0 :     } else if(residuename=="GN") {
     826             :       return true;
     827           0 :     } else if(residuename=="T") {
     828             :       return true;
     829           0 :     } else if(residuename=="T5") {
     830             :       return true;
     831           0 :     } else if(residuename=="T3") {
     832             :       return true;
     833           0 :     } else if(residuename=="TN") {
     834             :       return true;
     835           0 :     } else if(residuename=="C") {
     836             :       return true;
     837           0 :     } else if(residuename=="C5") {
     838             :       return true;
     839           0 :     } else if(residuename=="C3") {
     840             :       return true;
     841           0 :     } else if(residuename=="CN") {
     842             :       return true;
     843           0 :     } else if(residuename=="DA") {
     844             :       return true;
     845           0 :     } else if(residuename=="DA5") {
     846             :       return true;
     847           0 :     } else if(residuename=="DA3") {
     848             :       return true;
     849           0 :     } else if(residuename=="DAN") {
     850             :       return true;
     851           0 :     } else if(residuename=="DG") {
     852             :       return true;
     853           0 :     } else if(residuename=="DG5") {
     854             :       return true;
     855           0 :     } else if(residuename=="DG3") {
     856             :       return true;
     857           0 :     } else if(residuename=="DGN") {
     858             :       return true;
     859           0 :     } else if(residuename=="DT") {
     860             :       return true;
     861           0 :     } else if(residuename=="DT5") {
     862             :       return true;
     863           0 :     } else if(residuename=="DT3") {
     864             :       return true;
     865           0 :     } else if(residuename=="DTN") {
     866             :       return true;
     867           0 :     } else if(residuename=="DC") {
     868             :       return true;
     869           0 :     } else if(residuename=="DC5") {
     870             :       return true;
     871           0 :     } else if(residuename=="DC3") {
     872             :       return true;
     873           0 :     } else if(residuename=="DCN") {
     874             :       return true;
     875             :     } else {
     876           0 :       return false;
     877             :     }
     878           0 :   } else if( type=="rna" ) {
     879           0 :     if(residuename=="A") {
     880             :       return true;
     881           0 :     } else if(residuename=="A5") {
     882             :       return true;
     883           0 :     } else if(residuename=="A3") {
     884             :       return true;
     885           0 :     } else if(residuename=="AN") {
     886             :       return true;
     887           0 :     } else if(residuename=="G") {
     888             :       return true;
     889           0 :     } else if(residuename=="G5") {
     890             :       return true;
     891           0 :     } else if(residuename=="G3") {
     892             :       return true;
     893           0 :     } else if(residuename=="GN") {
     894             :       return true;
     895           0 :     } else if(residuename=="U") {
     896             :       return true;
     897           0 :     } else if(residuename=="U5") {
     898             :       return true;
     899           0 :     } else if(residuename=="U3") {
     900             :       return true;
     901           0 :     } else if(residuename=="UN") {
     902             :       return true;
     903           0 :     } else if(residuename=="C") {
     904             :       return true;
     905           0 :     } else if(residuename=="C5") {
     906             :       return true;
     907           0 :     } else if(residuename=="C3") {
     908             :       return true;
     909           0 :     } else if(residuename=="CN") {
     910             :       return true;
     911           0 :     } else if(residuename=="RA") {
     912             :       return true;
     913           0 :     } else if(residuename=="RA5") {
     914             :       return true;
     915           0 :     } else if(residuename=="RA3") {
     916             :       return true;
     917           0 :     } else if(residuename=="RAN") {
     918             :       return true;
     919           0 :     } else if(residuename=="RG") {
     920             :       return true;
     921           0 :     } else if(residuename=="RG5") {
     922             :       return true;
     923           0 :     } else if(residuename=="RG3") {
     924             :       return true;
     925           0 :     } else if(residuename=="RGN") {
     926             :       return true;
     927           0 :     } else if(residuename=="RU") {
     928             :       return true;
     929           0 :     } else if(residuename=="RU5") {
     930             :       return true;
     931           0 :     } else if(residuename=="RU3") {
     932             :       return true;
     933           0 :     } else if(residuename=="RUN") {
     934             :       return true;
     935           0 :     } else if(residuename=="RC") {
     936             :       return true;
     937           0 :     } else if(residuename=="RC5") {
     938             :       return true;
     939           0 :     } else if(residuename=="RC3") {
     940             :       return true;
     941           0 :     } else if(residuename=="RCN") {
     942             :       return true;
     943             :     } else {
     944           0 :       return false;
     945             :     }
     946           0 :   } else if( type=="water" ) {
     947           0 :     if(residuename=="SOL") {
     948             :       return true;
     949             :     }
     950           0 :     if(residuename=="WAT") {
     951             :       return true;
     952             :     }
     953           0 :     return false;
     954           0 :   } else if( type=="ion" ) {
     955           0 :     if(residuename=="IB+") {
     956             :       return true;
     957             :     }
     958           0 :     if(residuename=="CA") {
     959             :       return true;
     960             :     }
     961           0 :     if(residuename=="CL") {
     962             :       return true;
     963             :     }
     964           0 :     if(residuename=="NA") {
     965             :       return true;
     966             :     }
     967           0 :     if(residuename=="MG") {
     968             :       return true;
     969             :     }
     970           0 :     if(residuename=="K") {
     971             :       return true;
     972             :     }
     973           0 :     if(residuename=="RB") {
     974             :       return true;
     975             :     }
     976           0 :     if(residuename=="CS") {
     977             :       return true;
     978             :     }
     979           0 :     if(residuename=="LI") {
     980             :       return true;
     981             :     }
     982           0 :     if(residuename=="ZN") {
     983             :       return true;
     984             :     }
     985           0 :     return false;
     986             :   }
     987             :   return false;
     988             : }
     989             : 
     990             : }
     991             : 

Generated by: LCOV version 1.16