Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : University of Illinois Open Source License
3 : Copyright 2003 Theoretical and Computational Biophysics Group,
4 : All rights reserved.
5 :
6 : Developed by: Theoretical and Computational Biophysics Group
7 : University of Illinois at Urbana-Champaign
8 : http://www.ks.uiuc.edu/
9 :
10 : Permission is hereby granted, free of charge, to any person obtaining a copy of
11 : this software and associated documentation files (the Software), to deal with
12 : the Software without restriction, including without limitation the rights to
13 : use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
14 : of the Software, and to permit persons to whom the Software is furnished to
15 : do so, subject to the following conditions:
16 :
17 : Redistributions of source code must retain the above copyright notice,
18 : this list of conditions and the following disclaimers.
19 :
20 : Redistributions in binary form must reproduce the above copyright notice,
21 : this list of conditions and the following disclaimers in the documentation
22 : and/or other materials provided with the distribution.
23 :
24 : Neither the names of Theoretical and Computational Biophysics Group,
25 : University of Illinois at Urbana-Champaign, nor the names of its contributors
26 : may be used to endorse or promote products derived from this Software without
27 : specific prior written permission.
28 :
29 : THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
32 : THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
33 : OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
34 : ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
35 : OTHER DEALINGS WITH THE SOFTWARE.
36 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
37 : #ifndef __PLUMED_molfile_readpdb_h
38 : #define __PLUMED_molfile_readpdb_h
39 : /***************************************************************************
40 : *cr
41 : *cr (C) Copyright 1995-2016 The Board of Trustees of the
42 : *cr University of Illinois
43 : *cr All Rights Reserved
44 : *cr
45 : ***************************************************************************/
46 :
47 : /***************************************************************************
48 : * RCS INFORMATION:
49 : *
50 : * $RCSfile: readpdb.h,v $
51 : * $Author: johns $ $Locker: $ $State: Exp $
52 : * $Revision: 1.43 $ $Date: 2016/11/28 05:01:54 $
53 : *
54 : ***************************************************************************/
55 :
56 : #ifndef READ_PDB_H
57 : #define READ_PDB_H
58 :
59 : #include <stdio.h>
60 : #include <stdlib.h>
61 : #include <string.h>
62 :
63 : namespace PLMD{
64 : namespace molfile{
65 :
66 : #define PDB_RECORD_LENGTH 80 /* actual record size */
67 : #define PDB_BUFFER_LENGTH 83 /* size need to buffer + CR, LF, and NUL */
68 :
69 : #define VMDUSECONECTRECORDS 1
70 :
71 : /* record type defines */
72 : enum {
73 : PDB_HEADER, PDB_REMARK, PDB_ATOM, PDB_CONECT, PDB_UNKNOWN, PDB_END, PDB_EOF, PDB_CRYST1
74 : };
75 :
76 : /* read the next record from the specified pdb file, and put the string found
77 : in the given string pointer (the caller must provide adequate (81 chars)
78 : buffer space); return the type of record found
79 : */
80 205137 : static int read_pdb_record(FILE *f, char *retStr) {
81 : int ch;
82 : char inbuf[PDB_BUFFER_LENGTH]; /* space for line + cr + lf + NUL */
83 : int recType = PDB_UNKNOWN;
84 :
85 : /* XXX This PDB record reading code breaks with files that use
86 : * Mac or DOS style line breaks with ctrl-M characters. We need
87 : * to replace the use of fgets() and comparisons against \n with
88 : * code that properly handles the other cases.
89 : */
90 :
91 : /* read the next line, including any ending cr/lf char */
92 205137 : if (inbuf != fgets(inbuf, PDB_RECORD_LENGTH + 2, f)) {
93 48 : retStr[0] = '\0';
94 : recType = PDB_EOF;
95 : } else {
96 : #if 0
97 : /* XXX disabled this code since \n chars are desirable in remarks */
98 : /* and to make the behavior consistent with webpdbplugin */
99 :
100 : /* remove the newline character, if there is one */
101 : if (inbuf[strlen(inbuf)-1] == '\n')
102 : inbuf[strlen(inbuf)-1] = '\0';
103 : #endif
104 :
105 : /* atom records are the most common */
106 205089 : if (!strncmp(inbuf, "ATOM ", 5) || !strncmp(inbuf, "HETATM", 6)) {
107 : /* Note that by only comparing 5 chars for "ATOM " rather than 6, */
108 : /* we allow PDB files containing > 99,999 atoms generated by AMBER */
109 : /* to load which would otherwise fail. Not needed for HETATM since */
110 : /* those aren't going to show up in files produced for/by MD engines. */
111 : recType = PDB_ATOM;
112 471 : } else if (!strncmp(inbuf, "CONECT", 6)) {
113 : recType = PDB_CONECT;
114 471 : } else if (!strncmp(inbuf, "REMARK", 6)) {
115 : recType = PDB_REMARK;
116 377 : } else if (!strncmp(inbuf, "CRYST1", 6)) {
117 : recType = PDB_CRYST1;
118 313 : } else if (!strncmp(inbuf, "HEADER", 6)) {
119 : recType = PDB_HEADER;
120 313 : } else if (!strncmp(inbuf, "END", 3)) { /* very permissive */
121 : /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
122 : /* since we don't remove trailing '\n' chars */
123 :
124 : /* the only two legal END records are "END " and "ENDMDL" */
125 : recType = PDB_END;
126 : }
127 :
128 : #if 0
129 : /* XXX disable record type checking for now */
130 : if (recType == PDB_ATOM ||
131 : recType == PDB_CONECT ||
132 : recType == PDB_REMARK ||
133 : recType == PDB_HEADER ||
134 : recType == PDB_CRYST1) {
135 : strcpy(retStr, inbuf);
136 : } else {
137 : retStr[0] = '\0';
138 : }
139 : #else
140 : strcpy(retStr, inbuf);
141 : #endif
142 : }
143 :
144 : /* read the '\r', if there was one */
145 205137 : ch = fgetc(f);
146 205137 : if (ch != '\r')
147 205137 : ungetc(ch, f);
148 :
149 205137 : return recType;
150 : }
151 :
152 :
153 : /* Extract the alpha/beta/gamma a/b/c unit cell info from a CRYST1 record */
154 33 : static void get_pdb_cryst1(const char *record,
155 : float *alpha, float *beta, float *gamma,
156 : float *a, float *b, float *c) {
157 : char tmp[PDB_RECORD_LENGTH+3]; /* space for line + cr + lf + NUL */
158 : char ch, *s;
159 : memset(tmp, 0, sizeof(tmp));
160 : strncpy(tmp, record, PDB_RECORD_LENGTH);
161 :
162 33 : s = tmp+6 ; ch = tmp[15]; tmp[15] = 0;
163 33 : *a = (float) atof(s);
164 33 : s = tmp+15; *s = ch; ch = tmp[24]; tmp[24] = 0;
165 33 : *b = (float) atof(s);
166 33 : s = tmp+24; *s = ch; ch = tmp[33]; tmp[33] = 0;
167 33 : *c = (float) atof(s);
168 33 : s = tmp+33; *s = ch; ch = tmp[40]; tmp[40] = 0;
169 33 : *alpha = (float) atof(s);
170 33 : s = tmp+40; *s = ch; ch = tmp[47]; tmp[47] = 0;
171 33 : *beta = (float) atof(s);
172 33 : s = tmp+47; *s = ch; ch = tmp[54]; tmp[54] = 0;
173 33 : *gamma = (float) atof(s);
174 33 : }
175 :
176 :
177 : /* Extract the x,y,z coords, occupancy, and beta from an ATOM record */
178 104358 : static void get_pdb_coordinates(const char *record,
179 : float *x, float *y, float *z,
180 : float *occup, float *beta) {
181 : char numstr[50]; /* store all fields in one array to save memset calls */
182 : memset(numstr, 0, sizeof(numstr));
183 :
184 104358 : if (x != NULL) {
185 104358 : strncpy(numstr, record + 30, 8);
186 104358 : *x = (float) atof(numstr);
187 : }
188 :
189 104358 : if (y != NULL) {
190 104358 : strncpy(numstr+10, record + 38, 8);
191 104358 : *y = (float) atof(numstr+10);
192 : }
193 :
194 104358 : if (z != NULL) {
195 104358 : strncpy(numstr+20, record + 46, 8);
196 104358 : *z = (float) atof(numstr+20);
197 : }
198 :
199 104358 : if (occup != NULL) {
200 104358 : strncpy(numstr+30, record + 54, 6);
201 104358 : *occup = (float) atof(numstr+30);
202 : }
203 :
204 104358 : if (beta != NULL) {
205 104358 : strncpy(numstr+40, record + 60, 6);
206 104358 : *beta = (float) atof(numstr+40);
207 : }
208 104358 : }
209 :
210 :
211 : /* remove leading and trailing spaces from PDB fields */
212 0 : static void adjust_pdb_field_string(char *field) {
213 : int i, len;
214 :
215 0 : len = strlen(field);
216 0 : while (len > 0 && field[len-1] == ' ') {
217 0 : field[len-1] = '\0';
218 0 : len--;
219 : }
220 :
221 0 : while (len > 0 && field[0] == ' ') {
222 0 : for (i=0; i < len; i++)
223 0 : field[i] = field[i+1];
224 0 : len--;
225 : }
226 0 : }
227 :
228 0 : static void get_pdb_header(const char *record, char *pdbcode, char *date,
229 : char *classification) {
230 0 : if (date != NULL) {
231 0 : strncpy(date, record + 50, 9);
232 0 : date[9] = '\0';
233 : }
234 :
235 0 : if (classification != NULL) {
236 0 : strncpy(classification, record + 10, 40);
237 0 : classification[40] = '\0';
238 : }
239 :
240 0 : if (pdbcode != NULL) {
241 0 : strncpy(pdbcode, record + 62, 4);
242 0 : pdbcode[4] = '\0';
243 0 : adjust_pdb_field_string(pdbcode); /* remove spaces from accession code */
244 : }
245 0 : }
246 :
247 :
248 0 : static void get_pdb_conect(const char *record, int natoms, int *idxmap,
249 : int *maxbnum, int *nbonds, int **from, int **to) {
250 : int bondto[11], numbonds, i;
251 :
252 0 : int reclen = strlen(record);
253 0 : for (numbonds=0, i=0; i<11; i++) {
254 : char bondstr[6];
255 : const int fieldwidth = 5;
256 0 : int start = 6 + i*fieldwidth;
257 0 : int end = start + fieldwidth;
258 :
259 0 : if (end >= reclen)
260 : break;
261 :
262 0 : memcpy(bondstr, record + start, fieldwidth);
263 0 : bondstr[5] = '\0';
264 0 : if (sscanf(bondstr, "%d", &bondto[numbonds]) < 0)
265 : break;
266 0 : numbonds++;
267 : }
268 :
269 0 : for (i=0; i<numbonds; i++) {
270 : /* only add one bond per pair, PDBs list them redundantly */
271 0 : if (bondto[i] > bondto[0]) {
272 0 : int newnbonds = *nbonds + 1; /* add a new bond */
273 :
274 : /* allocate more bondlist space if necessary */
275 0 : if (newnbonds >= *maxbnum) {
276 : int newmax;
277 : int *newfromlist, *newtolist;
278 0 : newmax = (newnbonds + 11) * 1.25;
279 :
280 0 : newfromlist = (int *) realloc(*from, newmax * sizeof(int));
281 0 : newtolist = (int *) realloc(*to, newmax * sizeof(int));
282 :
283 0 : if (newfromlist != NULL || newtolist != NULL) {
284 0 : *maxbnum = newmax;
285 0 : *from = newfromlist;
286 0 : *to = newtolist;
287 : } else {
288 : printf("readpdb) failed to allocate memory for bondlists\n");
289 0 : return; /* abort */
290 : }
291 : }
292 :
293 0 : *nbonds = newnbonds;
294 0 : (*from)[newnbonds-1] = idxmap[bondto[0]] + 1;
295 0 : (*to)[newnbonds-1] = idxmap[bondto[i]] + 1;
296 : }
297 : }
298 : }
299 :
300 : /* ATOM field format according to PDB standard v2.2
301 : COLUMNS DATA TYPE FIELD DEFINITION
302 : ---------------------------------------------------------------------------------
303 : 1 - 6 Record name "ATOM "
304 : 7 - 11 Integer serial Atom serial number.
305 : 13 - 16 Atom name Atom name.
306 : 17 Character altLoc Alternate location indicator.
307 : 18 - 20 Residue name resName Residue name.
308 : 22 Character chainID Chain identifier.
309 : 23 - 26 Integer resSeq Residue sequence number.
310 : 27 AChar iCode Code for insertion of residues.
311 : 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
312 : 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
313 : 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
314 : 55 - 60 Real(6.2) occupancy Occupancy.
315 : 61 - 66 Real(6.2) tempFactor Temperature factor.
316 : 73 - 76 LString(4) segID Segment identifier, left-justified.
317 : 77 - 78 LString(2) element Element symbol, right-justified.
318 : 79 - 80 LString(2) charge Charge on the atom.
319 : */
320 :
321 : /* Break a pdb ATOM record into its fields. The user must provide the
322 : necessary space to store the atom name, residue name, and segment name.
323 : Character strings will be null-terminated.
324 : */
325 0 : static void get_pdb_fields(const char *record, int reclength, int *serial,
326 : char *name, char *resname, char *chain,
327 : char *segname, char *resid, char *insertion,
328 : char *altloc, char *elementsymbol,
329 : float *x, float *y, float *z,
330 : float *occup, float *beta) {
331 : char serialbuf[6];
332 :
333 : /* get atom serial number */
334 0 : strncpy(serialbuf, record + 6, 5);
335 0 : serialbuf[5] = '\0';
336 0 : *serial = 0;
337 0 : sscanf(serialbuf, "%5d", serial);
338 :
339 : /* get atom name */
340 0 : strncpy(name, record + 12, 4);
341 0 : name[4] = '\0';
342 0 : adjust_pdb_field_string(name); /* remove spaces from the name */
343 :
344 : /* get alternate location identifier */
345 0 : strncpy(altloc, record + 16, 1);
346 0 : altloc[1] = '\0';
347 :
348 : /* get residue name */
349 0 : strncpy(resname, record + 17, 4);
350 0 : resname[4] = '\0';
351 0 : adjust_pdb_field_string(resname); /* remove spaces from the resname */
352 :
353 : /* get chain name */
354 0 : chain[0] = record[21];
355 0 : chain[1] = '\0';
356 :
357 : /* get residue id number */
358 0 : strncpy(resid, record + 22, 4);
359 0 : resid[4] = '\0';
360 0 : adjust_pdb_field_string(resid); /* remove spaces from the resid */
361 :
362 : /* get the insertion code */
363 0 : insertion[0] = record[26];
364 0 : insertion[1] = '\0';
365 :
366 : /* get x, y, and z coordinates */
367 0 : get_pdb_coordinates(record, x, y, z, occup, beta);
368 :
369 : /* get segment name */
370 0 : if (reclength >= 73) {
371 0 : strncpy(segname, record + 72, 4);
372 0 : segname[4] = '\0';
373 0 : adjust_pdb_field_string(segname); /* remove spaces from the segname */
374 : } else {
375 0 : segname[0] = '\0';
376 : }
377 :
378 : /* get the atomic element symbol */
379 0 : if (reclength >= 77) {
380 0 : strncpy(elementsymbol, record + 76, 2);
381 0 : elementsymbol[2] = '\0';
382 : } else {
383 0 : elementsymbol[0] = '\0';
384 : }
385 0 : }
386 :
387 :
388 : /* Write PDB data to given file descriptor; return success. */
389 0 : static int write_raw_pdb_record(FILE *fd, const char *recordname,
390 : int index,const char *atomname, const char *resname,int resid,
391 : const char *insertion, const char *altloc, const char *elementsymbol,
392 : float x, float y, float z, float occ, float beta,
393 : const char *chain, const char *segname) {
394 : int rc;
395 : char indexbuf[32];
396 : char residbuf[32];
397 : char segnamebuf[5];
398 : char resnamebuf[5];
399 : char altlocchar;
400 :
401 : /* XXX */
402 : /* if the atom or residue indices exceed the legal PDB spec, we */
403 : /* start emitting asterisks or hexadecimal strings rather than */
404 : /* aborting. This is not really legal, but is an accepted hack */
405 : /* among various other programs that deal with large PDB files */
406 : /* If we run out of hexadecimal indices, then we just print */
407 : /* asterisks. */
408 0 : if (index < 100000) {
409 : sprintf(indexbuf, "%5d", index);
410 0 : } else if (index < 1048576) {
411 : sprintf(indexbuf, "%05x", index);
412 : } else {
413 : sprintf(indexbuf, "*****");
414 : }
415 :
416 0 : if (resid < 10000) {
417 : sprintf(residbuf, "%4d", resid);
418 0 : } else if (resid < 65536) {
419 : sprintf(residbuf, "%04x", resid);
420 : } else {
421 : sprintf(residbuf, "****");
422 : }
423 :
424 0 : altlocchar = altloc[0];
425 0 : if (altlocchar == '\0') {
426 : altlocchar = ' ';
427 : }
428 :
429 : /* make sure the segname or resname do not overflow the format */
430 : strncpy(segnamebuf,segname,4);
431 0 : segnamebuf[4] = '\0';
432 : strncpy(resnamebuf,resname,4);
433 0 : resnamebuf[4] = '\0';
434 :
435 :
436 0 : rc = fprintf(fd,
437 : "%-6s%5s %4s%c%-4s%c%4s%c %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s\n",
438 0 : recordname, indexbuf, atomname, altlocchar, resnamebuf, chain[0],
439 0 : residbuf, insertion[0], x, y, z, occ, beta, segnamebuf, elementsymbol);
440 :
441 0 : return (rc > 0);
442 : }
443 :
444 : }
445 : }
446 : #endif
447 : #endif
|