LCOV - code coverage report
Current view: top level - molfile - readpdb.h (source / functions) Hit Total Coverage
Test: plumed test coverage (other modules) Lines: 44 135 32.6 %
Date: 2024-10-11 08:09:49 Functions: 3 8 37.5 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             : University of Illinois Open Source License
       3             : Copyright 2003 Theoretical and Computational Biophysics Group, 
       4             : All rights reserved.
       5             : 
       6             : Developed by:           Theoretical and Computational Biophysics Group
       7             :                         University of Illinois at Urbana-Champaign
       8             :                         http://www.ks.uiuc.edu/
       9             : 
      10             : Permission is hereby granted, free of charge, to any person obtaining a copy of
      11             : this software and associated documentation files (the Software), to deal with 
      12             : the Software without restriction, including without limitation the rights to 
      13             : use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
      14             : of the Software, and to permit persons to whom the Software is furnished to 
      15             : do so, subject to the following conditions:
      16             : 
      17             : Redistributions of source code must retain the above copyright notice, 
      18             : this list of conditions and the following disclaimers.
      19             : 
      20             : Redistributions in binary form must reproduce the above copyright notice, 
      21             : this list of conditions and the following disclaimers in the documentation 
      22             : and/or other materials provided with the distribution.
      23             : 
      24             : Neither the names of Theoretical and Computational Biophysics Group, 
      25             : University of Illinois at Urbana-Champaign, nor the names of its contributors 
      26             : may be used to endorse or promote products derived from this Software without 
      27             : specific prior written permission.
      28             : 
      29             : THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
      30             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
      31             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 
      32             : THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
      33             : OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
      34             : ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
      35             : OTHER DEALINGS WITH THE SOFTWARE.
      36             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      37             : #ifndef __PLUMED_molfile_readpdb_h
      38             : #define __PLUMED_molfile_readpdb_h
      39             : /***************************************************************************
      40             :  *cr
      41             :  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
      42             :  *cr                        University of Illinois
      43             :  *cr                         All Rights Reserved
      44             :  *cr
      45             :  ***************************************************************************/
      46             : 
      47             : /***************************************************************************
      48             :  * RCS INFORMATION:
      49             :  *
      50             :  *      $RCSfile: readpdb.h,v $
      51             :  *      $Author: johns $       $Locker:  $             $State: Exp $
      52             :  *      $Revision: 1.43 $       $Date: 2016/11/28 05:01:54 $
      53             :  *
      54             :  ***************************************************************************/
      55             : 
      56             : #ifndef READ_PDB_H
      57             : #define READ_PDB_H
      58             : 
      59             : #include <stdio.h>
      60             : #include <stdlib.h>
      61             : #include <string.h>
      62             : 
      63             : namespace PLMD{
      64             : namespace molfile{
      65             : 
      66             : #define PDB_RECORD_LENGTH   80   /* actual record size */
      67             : #define PDB_BUFFER_LENGTH   83   /* size need to buffer + CR, LF, and NUL */
      68             : 
      69             : #define VMDUSECONECTRECORDS 1
      70             : 
      71             : /*  record type defines */
      72             : enum {
      73             :   PDB_HEADER, PDB_REMARK, PDB_ATOM, PDB_CONECT, PDB_UNKNOWN, PDB_END, PDB_EOF, PDB_CRYST1
      74             : };
      75             : 
      76             : /* read the next record from the specified pdb file, and put the string found
      77             :    in the given string pointer (the caller must provide adequate (81 chars)
      78             :    buffer space); return the type of record found
      79             : */
      80      205137 : static int read_pdb_record(FILE *f, char *retStr) {
      81             :   int ch;
      82             :   char inbuf[PDB_BUFFER_LENGTH]; /* space for line + cr + lf + NUL */
      83             :   int recType = PDB_UNKNOWN;
      84             :  
      85             :   /* XXX This PDB record reading code breaks with files that use
      86             :    * Mac or DOS style line breaks with ctrl-M characters.  We need
      87             :    * to replace the use of fgets() and comparisons against \n with
      88             :    * code that properly handles the other cases.
      89             :    */
      90             :  
      91             :   /* read the next line, including any ending cr/lf char */
      92      205137 :   if (inbuf != fgets(inbuf, PDB_RECORD_LENGTH + 2, f)) {
      93          48 :     retStr[0] = '\0';
      94             :     recType = PDB_EOF;
      95             :   } else {
      96             : #if 0
      97             :     /* XXX disabled this code since \n chars are desirable in remarks */
      98             :     /* and to make the behavior consistent with webpdbplugin          */
      99             : 
     100             :     /* remove the newline character, if there is one */
     101             :     if (inbuf[strlen(inbuf)-1] == '\n')
     102             :       inbuf[strlen(inbuf)-1] = '\0';
     103             : #endif
     104             : 
     105             :     /* atom records are the most common */
     106      205089 :     if (!strncmp(inbuf, "ATOM ",  5) || !strncmp(inbuf, "HETATM", 6)) {
     107             :       /* Note that by only comparing 5 chars for "ATOM " rather than 6,     */
     108             :       /* we allow PDB files containing > 99,999 atoms generated by AMBER    */
     109             :       /* to load which would otherwise fail.  Not needed for HETATM since   */
     110             :       /* those aren't going to show up in files produced for/by MD engines. */
     111             :       recType = PDB_ATOM;
     112         471 :     } else if (!strncmp(inbuf, "CONECT", 6)) {
     113             :       recType = PDB_CONECT;
     114         471 :     } else if (!strncmp(inbuf, "REMARK", 6)) {
     115             :       recType = PDB_REMARK;
     116         377 :     } else if (!strncmp(inbuf, "CRYST1", 6)) {
     117             :       recType = PDB_CRYST1;
     118         313 :     } else if (!strncmp(inbuf, "HEADER", 6)) {
     119             :       recType = PDB_HEADER;
     120         313 :     } else if (!strncmp(inbuf, "END", 3)) {  /* very permissive */
     121             :       /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
     122             :       /*     since we don't remove trailing '\n' chars                   */
     123             : 
     124             :       /* the only two legal END records are "END   " and "ENDMDL" */
     125             :       recType = PDB_END;
     126             :     } 
     127             : 
     128             : #if 0
     129             :     /* XXX disable record type checking for now */
     130             :     if (recType == PDB_ATOM || 
     131             :         recType == PDB_CONECT || 
     132             :         recType == PDB_REMARK || 
     133             :         recType == PDB_HEADER || 
     134             :         recType == PDB_CRYST1) {
     135             :       strcpy(retStr, inbuf);
     136             :     } else {
     137             :       retStr[0] = '\0';
     138             :     }
     139             : #else
     140             :     strcpy(retStr, inbuf);
     141             : #endif
     142             :   }
     143             : 
     144             :   /* read the '\r', if there was one */
     145      205137 :   ch = fgetc(f);
     146      205137 :   if (ch != '\r')
     147      205137 :     ungetc(ch, f);
     148             :   
     149      205137 :   return recType;
     150             : }
     151             : 
     152             : 
     153             : /* Extract the alpha/beta/gamma a/b/c unit cell info from a CRYST1 record */
     154          33 : static void get_pdb_cryst1(const char *record, 
     155             :                            float *alpha, float *beta, float *gamma, 
     156             :                            float *a, float *b, float *c) {
     157             :   char tmp[PDB_RECORD_LENGTH+3]; /* space for line + cr + lf + NUL */
     158             :   char ch, *s;
     159             :   memset(tmp, 0, sizeof(tmp));
     160             :   strncpy(tmp, record, PDB_RECORD_LENGTH);
     161             : 
     162          33 :   s = tmp+6 ;          ch = tmp[15]; tmp[15] = 0;
     163          33 :   *a = (float) atof(s);
     164          33 :   s = tmp+15; *s = ch; ch = tmp[24]; tmp[24] = 0;
     165          33 :   *b = (float) atof(s);
     166          33 :   s = tmp+24; *s = ch; ch = tmp[33]; tmp[33] = 0;
     167          33 :   *c = (float) atof(s);
     168          33 :   s = tmp+33; *s = ch; ch = tmp[40]; tmp[40] = 0;
     169          33 :   *alpha = (float) atof(s);
     170          33 :   s = tmp+40; *s = ch; ch = tmp[47]; tmp[47] = 0;
     171          33 :   *beta = (float) atof(s);
     172          33 :   s = tmp+47; *s = ch; ch = tmp[54]; tmp[54] = 0;
     173          33 :   *gamma = (float) atof(s);
     174          33 : }
     175             : 
     176             : 
     177             : /* Extract the x,y,z coords, occupancy, and beta from an ATOM record */
     178      104358 : static void get_pdb_coordinates(const char *record, 
     179             :                                 float *x, float *y, float *z,
     180             :                                 float *occup, float *beta) {
     181             :   char numstr[50]; /* store all fields in one array to save memset calls */
     182             :   memset(numstr, 0, sizeof(numstr));
     183             : 
     184      104358 :   if (x != NULL) {
     185      104358 :     strncpy(numstr, record + 30, 8);
     186      104358 :     *x = (float) atof(numstr);
     187             :   }
     188             : 
     189      104358 :   if (y != NULL) {
     190      104358 :     strncpy(numstr+10, record + 38, 8);
     191      104358 :     *y = (float) atof(numstr+10);
     192             :   }
     193             : 
     194      104358 :   if (z != NULL) {
     195      104358 :     strncpy(numstr+20, record + 46, 8);
     196      104358 :     *z = (float) atof(numstr+20);
     197             :   }
     198             : 
     199      104358 :   if (occup != NULL) {
     200      104358 :     strncpy(numstr+30, record + 54, 6);
     201      104358 :     *occup = (float) atof(numstr+30);
     202             :   }
     203             : 
     204      104358 :   if (beta != NULL) {
     205      104358 :     strncpy(numstr+40, record + 60, 6);
     206      104358 :     *beta = (float) atof(numstr+40);
     207             :   }
     208      104358 : }
     209             : 
     210             : 
     211             : /* remove leading and trailing spaces from PDB fields */
     212           0 : static void adjust_pdb_field_string(char *field) {
     213             :   int i, len;
     214             : 
     215           0 :   len = strlen(field);
     216           0 :   while (len > 0 && field[len-1] == ' ') {
     217           0 :     field[len-1] = '\0';
     218           0 :     len--;
     219             :   }
     220             : 
     221           0 :   while (len > 0 && field[0] == ' ') {
     222           0 :     for (i=0; i < len; i++)
     223           0 :       field[i] = field[i+1];
     224           0 :     len--;
     225             :   }
     226           0 : }
     227             : 
     228           0 : static void get_pdb_header(const char *record, char *pdbcode, char *date,
     229             :                            char *classification) {
     230           0 :   if (date != NULL) {
     231           0 :     strncpy(date, record + 50, 9);
     232           0 :     date[9] = '\0';
     233             :   }
     234             : 
     235           0 :   if (classification != NULL) {
     236           0 :     strncpy(classification, record + 10, 40);
     237           0 :     classification[40] = '\0';
     238             :   }
     239             : 
     240           0 :   if (pdbcode != NULL) {
     241           0 :     strncpy(pdbcode, record + 62, 4);
     242           0 :     pdbcode[4] = '\0';
     243           0 :     adjust_pdb_field_string(pdbcode); /* remove spaces from accession code */
     244             :   }
     245           0 : }
     246             : 
     247             : 
     248           0 : static void get_pdb_conect(const char *record, int natoms, int *idxmap,
     249             :                            int *maxbnum, int *nbonds, int **from, int **to) {
     250             :   int bondto[11], numbonds, i;
     251             : 
     252           0 :   int reclen = strlen(record);
     253           0 :   for (numbonds=0, i=0; i<11; i++) {
     254             :     char bondstr[6];
     255             :     const int fieldwidth = 5;
     256           0 :     int start = 6 + i*fieldwidth;
     257           0 :     int end = start + fieldwidth;
     258             : 
     259           0 :     if (end >= reclen)
     260             :       break;
     261             : 
     262           0 :     memcpy(bondstr, record + start, fieldwidth);
     263           0 :     bondstr[5] = '\0';
     264           0 :     if (sscanf(bondstr, "%d", &bondto[numbonds]) < 0)
     265             :       break;
     266           0 :     numbonds++; 
     267             :   }
     268             : 
     269           0 :   for (i=0; i<numbonds; i++) {
     270             :     /* only add one bond per pair, PDBs list them redundantly */ 
     271           0 :     if (bondto[i] > bondto[0]) {
     272           0 :       int newnbonds = *nbonds + 1; /* add a new bond */
     273             : 
     274             :       /* allocate more bondlist space if necessary */
     275           0 :       if (newnbonds >= *maxbnum) {
     276             :         int newmax;
     277             :         int *newfromlist, *newtolist;
     278           0 :         newmax = (newnbonds + 11) * 1.25;
     279             : 
     280           0 :         newfromlist = (int *) realloc(*from, newmax * sizeof(int));
     281           0 :         newtolist = (int *) realloc(*to, newmax * sizeof(int));
     282             : 
     283           0 :         if (newfromlist != NULL || newtolist != NULL) {
     284           0 :           *maxbnum = newmax;
     285           0 :           *from = newfromlist;
     286           0 :           *to = newtolist;
     287             :         } else {
     288             :           printf("readpdb) failed to allocate memory for bondlists\n");
     289           0 :           return; /* abort */
     290             :         }
     291             :       }
     292             : 
     293           0 :       *nbonds = newnbonds;
     294           0 :       (*from)[newnbonds-1] = idxmap[bondto[0]] + 1;
     295           0 :       (*to)[newnbonds-1] = idxmap[bondto[i]] + 1;
     296             :     }
     297             :   }
     298             : }
     299             : 
     300             : /* ATOM field format according to PDB standard v2.2
     301             :   COLUMNS        DATA TYPE       FIELD         DEFINITION
     302             : ---------------------------------------------------------------------------------
     303             :  1 -  6        Record name     "ATOM  "
     304             :  7 - 11        Integer         serial        Atom serial number.
     305             : 13 - 16        Atom            name          Atom name.
     306             : 17             Character       altLoc        Alternate location indicator.
     307             : 18 - 20        Residue name    resName       Residue name.
     308             : 22             Character       chainID       Chain identifier.
     309             : 23 - 26        Integer         resSeq        Residue sequence number.
     310             : 27             AChar           iCode         Code for insertion of residues.
     311             : 31 - 38        Real(8.3)       x             Orthogonal coordinates for X in Angstroms.
     312             : 39 - 46        Real(8.3)       y             Orthogonal coordinates for Y in Angstroms.
     313             : 47 - 54        Real(8.3)       z             Orthogonal coordinates for Z in Angstroms.
     314             : 55 - 60        Real(6.2)       occupancy     Occupancy.
     315             : 61 - 66        Real(6.2)       tempFactor    Temperature factor.
     316             : 73 - 76        LString(4)      segID         Segment identifier, left-justified.
     317             : 77 - 78        LString(2)      element       Element symbol, right-justified.
     318             : 79 - 80        LString(2)      charge        Charge on the atom.
     319             :  */
     320             : 
     321             : /* Break a pdb ATOM record into its fields.  The user must provide the
     322             :    necessary space to store the atom name, residue name, and segment name.
     323             :    Character strings will be null-terminated.
     324             : */
     325           0 : static void get_pdb_fields(const char *record, int reclength, int *serial,
     326             :                            char *name, char *resname, char *chain, 
     327             :                            char *segname, char *resid, char *insertion, 
     328             :                            char *altloc, char *elementsymbol,
     329             :                            float *x, float *y, float *z, 
     330             :                            float *occup, float *beta) {
     331             :   char serialbuf[6];
     332             : 
     333             :   /* get atom serial number */
     334           0 :   strncpy(serialbuf, record + 6, 5);
     335           0 :   serialbuf[5] = '\0';
     336           0 :   *serial = 0;
     337           0 :   sscanf(serialbuf, "%5d", serial);
     338             :   
     339             :   /* get atom name */
     340           0 :   strncpy(name, record + 12, 4);
     341           0 :   name[4] = '\0';
     342           0 :   adjust_pdb_field_string(name); /* remove spaces from the name */
     343             : 
     344             :   /* get alternate location identifier */
     345           0 :   strncpy(altloc, record + 16, 1);
     346           0 :   altloc[1] = '\0';
     347             : 
     348             :   /* get residue name */
     349           0 :   strncpy(resname, record + 17, 4);
     350           0 :   resname[4] = '\0';
     351           0 :   adjust_pdb_field_string(resname); /* remove spaces from the resname */
     352             : 
     353             :   /* get chain name */
     354           0 :   chain[0] = record[21];
     355           0 :   chain[1] = '\0';
     356             : 
     357             :   /* get residue id number */
     358           0 :   strncpy(resid, record + 22, 4);
     359           0 :   resid[4] = '\0';
     360           0 :   adjust_pdb_field_string(resid); /* remove spaces from the resid */
     361             : 
     362             :   /* get the insertion code */
     363           0 :   insertion[0] = record[26];
     364           0 :   insertion[1] = '\0';
     365             : 
     366             :   /* get x, y, and z coordinates */
     367           0 :   get_pdb_coordinates(record, x, y, z, occup, beta);
     368             : 
     369             :   /* get segment name */
     370           0 :   if (reclength >= 73) {
     371           0 :     strncpy(segname, record + 72, 4);
     372           0 :     segname[4] = '\0';
     373           0 :     adjust_pdb_field_string(segname); /* remove spaces from the segname */
     374             :   } else {
     375           0 :     segname[0] = '\0';
     376             :   }
     377             : 
     378             :   /* get the atomic element symbol */
     379           0 :   if (reclength >= 77) {
     380           0 :     strncpy(elementsymbol, record + 76, 2);
     381           0 :     elementsymbol[2] = '\0';
     382             :   } else {
     383           0 :     elementsymbol[0] = '\0';
     384             :   }
     385           0 : }  
     386             : 
     387             : 
     388             : /* Write PDB data to given file descriptor; return success. */
     389           0 : static int write_raw_pdb_record(FILE *fd, const char *recordname,
     390             :     int index,const char *atomname, const char *resname,int resid, 
     391             :     const char *insertion, const char *altloc, const char *elementsymbol,
     392             :     float x, float y, float z, float occ, float beta, 
     393             :     const char *chain, const char *segname) {
     394             :   int rc;
     395             :   char indexbuf[32];
     396             :   char residbuf[32];
     397             :   char segnamebuf[5];
     398             :   char resnamebuf[5];
     399             :   char altlocchar;
     400             : 
     401             :   /* XXX                                                          */
     402             :   /* if the atom or residue indices exceed the legal PDB spec, we */
     403             :   /* start emitting asterisks or hexadecimal strings rather than  */
     404             :   /* aborting.  This is not really legal, but is an accepted hack */
     405             :   /* among various other programs that deal with large PDB files  */
     406             :   /* If we run out of hexadecimal indices, then we just print     */
     407             :   /* asterisks.                                                   */
     408           0 :   if (index < 100000) {
     409             :     sprintf(indexbuf, "%5d", index);
     410           0 :   } else if (index < 1048576) {
     411             :     sprintf(indexbuf, "%05x", index);
     412             :   } else {
     413             :     sprintf(indexbuf, "*****");
     414             :   }
     415             : 
     416           0 :   if (resid < 10000) {
     417             :     sprintf(residbuf, "%4d", resid);
     418           0 :   } else if (resid < 65536) {
     419             :     sprintf(residbuf, "%04x", resid);
     420             :   } else { 
     421             :     sprintf(residbuf, "****");
     422             :   }
     423             : 
     424           0 :   altlocchar = altloc[0];
     425           0 :   if (altlocchar == '\0') {
     426             :     altlocchar = ' ';
     427             :   }
     428             : 
     429             :   /* make sure the segname or resname do not overflow the format */ 
     430             :   strncpy(segnamebuf,segname,4);
     431           0 :   segnamebuf[4] = '\0';
     432             :   strncpy(resnamebuf,resname,4);
     433           0 :   resnamebuf[4] = '\0';
     434             : 
     435             :  
     436           0 :   rc = fprintf(fd,
     437             :          "%-6s%5s %4s%c%-4s%c%4s%c   %8.3f%8.3f%8.3f%6.2f%6.2f      %-4s%2s\n",
     438           0 :          recordname, indexbuf, atomname, altlocchar, resnamebuf, chain[0], 
     439           0 :          residbuf, insertion[0], x, y, z, occ, beta, segnamebuf, elementsymbol);
     440             : 
     441           0 :   return (rc > 0);
     442             : }
     443             : 
     444             : }
     445             : }
     446             : #endif
     447             : #endif

Generated by: LCOV version 1.15