Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2019 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "cltools/CLTool.h"
23 : #include "cltools/CLToolRegister.h"
24 : #include "tools/Tools.h"
25 : #include "tools/Pbc.h"
26 : #include "core/Value.h"
27 : #include "reference/ReferenceConfiguration.h"
28 : #include "PathReparameterization.h"
29 : #include "reference/MetricRegister.h"
30 : #include <cstdio>
31 : #include <string>
32 : #include <vector>
33 : #include <iostream>
34 :
35 : using namespace std;
36 :
37 : namespace PLMD {
38 : namespace mapping {
39 :
40 : //+PLUMEDOC TOOLS pathtools
41 : /*
42 : pathtools can be used to construct paths from pdb data
43 :
44 : The path CVs in PLUMED are curvilinear coordinates through a high dimensional vector space.
45 : Enhanced sampling calculations are ofen run using the progress along the paths and the distance from the path as CVs
46 : as this provides a convenient way of defining a reaction coordinate for a complicated process. This method is explained
47 : in the documentation for \ref PATH.
48 :
49 : The path itself is an ordered set of equally-spaced, high-dimensional frames the way in which these frames
50 : should be constructed will depend on the problem in hand. In other words, you will need to understand the reaction
51 : you wish to study in order to select a sensible set of frames to use in your path CV. This tool provides two
52 : methods that may be useful when it comes to constructing paths; namely:
53 :
54 : - A tool that takes in an initial guess path in which the frames are not equally spaced. This tool adjusts the positions
55 : of the frames in order to make them equally spaced so that they can be used as the basis for a path CV.
56 :
57 : - A tool that takes two frames as input and that allows you to return a linear path connecting these two frames. The
58 : output from this method may be useful as an initial guess path. It is arguable that a linear path rather defeats the
59 : purpose of the path CV method, however, as the whole purpose is to be able to define non-linear paths.
60 :
61 : Notice that you can use these two methods and take advantage of all the ways of measuring \ref dists that are available within
62 : PLUMED. The way you do this with each of these tools described above is explained in the example below.
63 :
64 : \par Examples
65 :
66 : The example below shows how you can take a set of unequally spaced frames from a pdb file named inpath.pdb
67 : and use pathtools to make them equally spaced so that they can be used as the basis for a path CV. The file
68 : containing this final path is named outpath.pdb.
69 :
70 : \verbatim
71 : plumed pathtools --path inpath.pdb --metric EUCLIDEAN --out outpath.pdb
72 : \endverbatim
73 :
74 : The example below shows how can create an initial linear path connecting the two pdb frames in start.pdb and
75 : end.pdb. In this case the path output to path.pdb will consist of 6 frames: the initial and final frames that
76 : were contained in start.pdb and end.pdb as well as four equally spaced frames along the vector connecting
77 : start.pdb to end.pdb.
78 :
79 : \verbatim
80 : plumed pathtools --start start.pdb --end end.pdb --nframes 4 --metric OPTIMAL --out path.pdb
81 : \endverbatim
82 :
83 : Often the idea with path cvs is to create a path connecting some initial state A to some final state B. You would
84 : in this case have representative configurations from your A and B states defined in the input files to pathtools
85 : that we have called start.pdb and end.pdb in the example above. Furthermore, it may be useful to have a few frames
86 : before your start frame and after your end frame. You can use path tools to create these extended paths as shown below.
87 : In this case the final path would now consist of 8 frames. Four of these frames would lie on the vector connecting state
88 : A to state B, there would be one frame each at start.pdb and end.pdb as well as one frame just before start.pdb and one
89 : frame just after end.pdb. All these frames would be equally spaced.
90 :
91 : \verbatim
92 : plumed pathtools --start start.pdb --end end.pdb --nframes 4 --metric OPTIMAL --out path.pdb --nframes-before-start 2 --nframes-after-end 2
93 : \endverbatim
94 :
95 : Notice also that when you reparameterise paths you must choose two frames to fix. Generally you chose to fix the states
96 : that are representative of your states A and B. By default pathtools will fix the first and last frames. You can, however,
97 : change the states to fix by taking advantage of the fixed flag as shown below.
98 :
99 : \verbatim
100 : plumed pathtools --path inpath.pdb --metric EUCLIDEAN --out outpath.pdb --fixed 2,12
101 : \endverbatim
102 :
103 : */
104 : //+ENDPLUMEDOC
105 :
106 4 : class PathTools :
107 : public CLTool
108 : {
109 : public:
110 : static void registerKeywords( Keywords& keys );
111 : explicit PathTools(const CLToolOptions& co );
112 : int main(FILE* in, FILE*out,Communicator& pc);
113 0 : string description()const {
114 0 : return "print out a description of the keywords for an action in html";
115 : }
116 : };
117 :
118 6456 : PLUMED_REGISTER_CLTOOL(PathTools,"pathtools")
119 :
120 1613 : void PathTools::registerKeywords( Keywords& keys ) {
121 1613 : CLTool::registerKeywords( keys );
122 6452 : keys.add("atoms","--start","a pdb file that contains the structure for the initial frame of your path");
123 6452 : keys.add("atoms","--end","a pdb file that contains the structure for the final frame of your path");
124 6452 : keys.add("atoms-1","--path","a pdb file that contains an initial path in which the frames are not equally spaced");
125 8065 : keys.add("compulsory","--fixed","0","the frames to fix when constructing the path using --path");
126 6452 : keys.add("compulsory","--metric","the measure to use to calculate the distance between frames");
127 6452 : keys.add("compulsory","--out","the name of the file on which to output your path");
128 8065 : keys.add("compulsory","--arg-fmt","%f","the format to use for argument values in your frames");
129 8065 : keys.add("compulsory","--tolerance","1E-4","the tolerance to use for the path reparameterization algorithm");
130 8065 : keys.add("compulsory","--nframes-before-start","1","the number of frames to include in the path before the first frame");
131 8065 : keys.add("compulsory","--nframes","1","the number of frames between the start and end frames in your path");
132 8065 : keys.add("compulsory","--nframes-after-end","1","the number of frames to put after the last frame of your path");
133 1613 : }
134 :
135 4 : PathTools::PathTools(const CLToolOptions& co ):
136 4 : CLTool(co)
137 : {
138 4 : inputdata=commandline;
139 4 : }
140 :
141 4 : int PathTools::main(FILE* in, FILE*out,Communicator& pc) {
142 :
143 8 : std::string mtype; parse("--metric",mtype);
144 8 : std::string ifilename; parse("--path",ifilename);
145 8 : std::string ofmt; parse("--arg-fmt",ofmt);
146 8 : std::string ofilename; parse("--out",ofilename);
147 4 : if( ifilename.length()>0 ) {
148 : fprintf(out,"Reparameterising path in file named %s so that all frames are equally spaced \n",ifilename.c_str() );
149 2 : FILE* fp=fopen(ifilename.c_str(),"r");
150 : bool do_read=true; std::vector<ReferenceConfiguration*> frames;
151 46 : while (do_read) {
152 44 : PDB mypdb;
153 : // Read the pdb file
154 22 : do_read=mypdb.readFromFilepointer(fp,false,0.1);
155 22 : if( do_read ) {
156 20 : ReferenceConfiguration* mymsd=metricRegister().create<ReferenceConfiguration>( mtype, mypdb );
157 20 : frames.push_back( mymsd ); mymsd->checkRead();
158 : }
159 : }
160 4 : std::vector<unsigned> fixed; parseVector("--fixed",fixed);
161 2 : if( fixed.size()==1 ) {
162 1 : if( fixed[0]!=0 ) error("input to --fixed should be two integers");
163 4 : fixed.resize(2); fixed[0]=0; fixed[1]=frames.size()-1;
164 1 : } else if( fixed.size()==2 ) {
165 2 : if( fixed[0]<0 || fixed[1]<0 || fixed[0]>(frames.size()-1) || fixed[1]>(frames.size()-1) ) {
166 0 : error("input to --fixed should be two numbers between 0 and the number of frames-1");
167 : }
168 : } else {
169 0 : error("input to --fixed should be two integers");
170 : }
171 2 : std::vector<AtomNumber> atoms; std::vector<std::string> arg_names;
172 64 : for(unsigned i=0; i<frames.size(); ++i) {
173 20 : frames[i]->getAtomRequests( atoms);
174 20 : frames[i]->getArgumentRequests( arg_names );
175 : }
176 : // Generate stuff to reparameterize
177 2 : Pbc fake_pbc; std::vector<Value*> vals;
178 6 : for(unsigned i=0; i<frames[0]->getNumberOfReferenceArguments(); ++i) {
179 8 : vals.push_back(new Value()); vals[vals.size()-1]->setNotPeriodic();
180 : }
181 : // And reparameterize
182 4 : PathReparameterization myparam( fake_pbc, vals, frames );
183 : // And make all points equally spaced
184 6 : double tol; parse("--tolerance",tol); myparam.reparameterize( fixed[0], fixed[1], tol );
185 :
186 : // Ouput data on spacings
187 : double mean=0;
188 6 : MultiValue myvpack( 1, frames[0]->getNumberOfReferenceArguments() + 3*frames[0]->getNumberOfReferencePositions() + 9 );
189 6 : ReferenceValuePack mypack( frames[0]->getNumberOfReferenceArguments(), frames[0]->getNumberOfReferencePositions(), myvpack );
190 58 : for(unsigned i=1; i<frames.size(); ++i) {
191 54 : double len = frames[i]->calc( frames[i-1]->getReferencePositions(), fake_pbc, vals, frames[i-1]->getReferenceArguments(), mypack, false );
192 : printf("FINAL DISTANCE BETWEEN FRAME %u AND %u IS %f \n",i-1,i,len );
193 18 : mean+=len;
194 : }
195 2 : printf("SUGGESTED LAMBDA PARAMETER IS THUS %f \n",2.3/mean/static_cast<double>( frames.size()-1 ) );
196 :
197 : // Delete all the frames
198 4 : OFile ofile; ofile.open(ofilename);
199 84 : for(unsigned i=0; i<frames.size(); ++i) { frames[i]->print( ofile, ofmt, 10. ); delete frames[i]; }
200 : // Delete the vals as we don't need them
201 10 : for(unsigned i=0; i<vals.size(); ++i) delete vals[i];
202 : // Return as we are done
203 2 : ofile.close(); return 0;
204 : }
205 :
206 : // Read initial frame
207 8 : std::string istart; parse("--start",istart); FILE* fp2=fopen(istart.c_str(),"r"); PDB mystartpdb;
208 2 : if( istart.length()==0 ) error("input is missing use --istart + --iend or --path");
209 2 : if( !mystartpdb.readFromFilepointer(fp2,false,0.1) ) error("could not read fila " + istart);
210 2 : ReferenceConfiguration* sframe=metricRegister().create<ReferenceConfiguration>( mtype, mystartpdb );
211 2 : fclose(fp2);
212 :
213 : // Read final frame
214 8 : std::string iend; parse("--end",iend); FILE* fp1=fopen(iend.c_str(),"r"); PDB myendpdb;
215 2 : if( iend.length()==0 ) error("input is missing using --istart + --iend or --path");
216 2 : if( !myendpdb.readFromFilepointer(fp1,false,0.1) ) error("could not read fila " + iend);
217 2 : ReferenceConfiguration* eframe=metricRegister().create<ReferenceConfiguration>( mtype, myendpdb );
218 2 : fclose(fp1);
219 :
220 : // Get atoms and arg requests
221 2 : std::vector<AtomNumber> atoms; std::vector<std::string> arg_names;
222 2 : sframe->getAtomRequests( atoms); eframe->getAtomRequests( atoms);
223 2 : sframe->getArgumentRequests( arg_names ); eframe->getArgumentRequests( arg_names );
224 :
225 : // Now read in the rest of the instructions
226 : unsigned nbefore, nbetween, nafter;
227 8 : parse("--nframes-before-start",nbefore); parse("--nframes",nbetween); parse("--nframes-after-end",nafter);
228 2 : nbetween++;
229 : fprintf(out,"Generating linear path connecting structure in file named %s to structure in file named %s \n",istart.c_str(),iend.c_str() );
230 2 : fprintf(out,"A path consisting of %u equally-spaced frames before the initial structure, %u frames between the intial and final structures "
231 : "and %u frames after the final structure will be created \n",nbefore,nbetween,nafter);
232 :
233 : // Create a vector of arguments to use for calculating displacements
234 2 : Pbc fpbc; std::vector<Value*> args;
235 6 : for(unsigned i=0; i<eframe->getNumberOfReferenceArguments(); ++i) {
236 8 : args.push_back(new Value()); args[args.size()-1]->setNotPeriodic();
237 : }
238 :
239 : // Calculate the distance between the start and the end
240 4 : MultiValue myvpack( 1, sframe->getNumberOfReferenceArguments() + 3*sframe->getNumberOfReferencePositions() + 9);
241 4 : ReferenceValuePack mypack( sframe->getNumberOfReferenceArguments(), sframe->getNumberOfReferencePositions(), myvpack );
242 2 : double pathlen = sframe->calc( eframe->getReferencePositions(), fpbc, args, eframe->getReferenceArguments(), mypack, false );
243 : // And the spacing between frames
244 2 : double delr = 1.0 / static_cast<double>( nbetween );
245 : // Calculate the vector connecting the start to the end
246 8 : Direction mydir(ReferenceConfigurationOptions("DIRECTION")); sframe->setupPCAStorage( mypack );
247 2 : mydir.setNamesAndAtomNumbers( sframe->getAbsoluteIndexes(), sframe->getArgumentNames() ); mydir.zeroDirection();
248 2 : sframe->extractDisplacementVector( eframe->getReferencePositions(), args, eframe->getReferenceArguments(), false, mydir );
249 :
250 :
251 : // Now create frames
252 : std::vector<ReferenceConfiguration*> final_path;
253 8 : Direction pos(ReferenceConfigurationOptions("DIRECTION"));
254 2 : pos.setNamesAndAtomNumbers( sframe->getAbsoluteIndexes(), sframe->getArgumentNames() );
255 6 : for(int i=0; i<nbefore; ++i) {
256 2 : pos.setDirection( sframe->getReferencePositions(), sframe->getReferenceArguments() );
257 2 : pos.displaceReferenceConfiguration( -i*delr, mydir );
258 4 : final_path.push_back( metricRegister().create<ReferenceConfiguration>(mtype) );
259 4 : final_path[final_path.size()-1]->setNamesAndAtomNumbers( sframe->getAbsoluteIndexes(), sframe->getArgumentNames() );
260 4 : final_path[final_path.size()-1]->setReferenceConfig( pos.getReferencePositions(), pos.getReferenceArguments(), sframe->getReferenceMetric() );
261 : }
262 14 : for(unsigned i=1; i<nbetween; ++i) {
263 6 : pos.setDirection( sframe->getReferencePositions(), sframe->getReferenceArguments() );
264 6 : pos.displaceReferenceConfiguration( i*delr, mydir );
265 12 : final_path.push_back( metricRegister().create<ReferenceConfiguration>(mtype) );
266 12 : final_path[final_path.size()-1]->setNamesAndAtomNumbers( sframe->getAbsoluteIndexes(), sframe->getArgumentNames() );
267 12 : final_path[final_path.size()-1]->setReferenceConfig( pos.getReferencePositions(), pos.getReferenceArguments(), sframe->getReferenceMetric() );
268 : }
269 12 : for(unsigned i=0; i<nafter; ++i) {
270 5 : pos.setDirection( eframe->getReferencePositions(), eframe->getReferenceArguments() );
271 5 : pos.displaceReferenceConfiguration( i*delr, mydir );
272 10 : final_path.push_back( metricRegister().create<ReferenceConfiguration>(mtype) );
273 10 : final_path[final_path.size()-1]->setNamesAndAtomNumbers( sframe->getAbsoluteIndexes(), sframe->getArgumentNames() );
274 10 : final_path[final_path.size()-1]->setReferenceConfig( pos.getReferencePositions(), pos.getReferenceArguments(), sframe->getReferenceMetric() );
275 : }
276 :
277 : double mean=0; printf("DISTANCE BETWEEN ORIGINAL FRAMES %f \n",pathlen);
278 37 : for(unsigned i=1; i<final_path.size(); ++i) {
279 33 : double len = final_path[i]->calc( final_path[i-1]->getReferencePositions(), fpbc, args, final_path[i-1]->getReferenceArguments(), mypack, false );
280 : printf("FINAL DISTANCE BETWEEN FRAME %u AND %u IS %f \n",i-1,i,len );
281 11 : mean+=len;
282 : }
283 2 : printf("SUGGESTED LAMBDA PARAMETER IS THUS %f \n",2.3/mean/static_cast<double>( final_path.size()-1 ) );
284 :
285 4 : OFile ofile; ofile.open(ofilename);
286 56 : for(unsigned i=0; i<final_path.size(); ++i) { final_path[i]->print( ofile, ofmt, 10. ); delete final_path[i]; }
287 : // Delete the args as we don't need them anymore
288 10 : for(unsigned i=0; i<args.size(); ++i) delete args[i];
289 2 : ofile.close(); delete sframe; delete eframe; return 0;
290 : }
291 :
292 : } // End of namespace
293 4839 : }
|