Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2016-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "cltools/CLTool.h"
23 : #include "cltools/CLToolRegister.h"
24 : #include "tools/Tools.h"
25 : #include "tools/Pbc.h"
26 : #include "core/Value.h"
27 : #include "reference/ReferenceConfiguration.h"
28 : #include "PathReparameterization.h"
29 : #include "reference/MetricRegister.h"
30 : #include <cstdio>
31 : #include <string>
32 : #include <vector>
33 : #include <iostream>
34 :
35 : namespace PLMD {
36 : namespace mapping {
37 :
38 : //+PLUMEDOC TOOLS pathtools
39 : /*
40 : pathtools can be used to construct paths from pdb data
41 :
42 : The path CVs in PLUMED are curvilinear coordinates through a high dimensional vector space.
43 : Enhanced sampling calculations are often run using the progress along the paths and the distance from the path as CVs
44 : as this provides a convenient way of defining a reaction coordinate for a complicated process. This method is explained
45 : in the documentation for \ref PATH.
46 :
47 : The path itself is an ordered set of equally-spaced, high-dimensional frames the way in which these frames
48 : should be constructed will depend on the problem in hand. In other words, you will need to understand the reaction
49 : you wish to study in order to select a sensible set of frames to use in your path CV. This tool provides two
50 : methods that may be useful when it comes to constructing paths; namely:
51 :
52 : - A tool that takes in an initial guess path in which the frames are not equally spaced. This tool adjusts the positions
53 : of the frames in order to make them equally spaced so that they can be used as the basis for a path CV.
54 :
55 : - A tool that takes two frames as input and that allows you to return a linear path connecting these two frames. The
56 : output from this method may be useful as an initial guess path. It is arguable that a linear path rather defeats the
57 : purpose of the path CV method, however, as the whole purpose is to be able to define non-linear paths.
58 :
59 : Notice that you can use these two methods and take advantage of all the ways of measuring \ref dists that are available within
60 : PLUMED. The way you do this with each of these tools described above is explained in the example below.
61 :
62 : \par Examples
63 :
64 : The example below shows how you can take a set of unequally spaced frames from a pdb file named in_path.pdb
65 : and use pathtools to make them equally spaced so that they can be used as the basis for a path CV. The file
66 : containing this final path is named final_path.pdb.
67 :
68 : \verbatim
69 : plumed pathtools --path in_path.pdb --metric EUCLIDEAN --out final_path.pdb
70 : \endverbatim
71 :
72 : The example below shows how can create an initial linear path connecting the two pdb frames in start.pdb and
73 : end.pdb. In this case the path output to path.pdb will consist of 6 frames: the initial and final frames that
74 : were contained in start.pdb and end.pdb as well as four equally spaced frames along the vector connecting
75 : start.pdb to end.pdb.
76 :
77 : \verbatim
78 : plumed pathtools --start start.pdb --end end.pdb --nframes 4 --metric OPTIMAL --out path.pdb
79 : \endverbatim
80 :
81 : Often the idea with path collective variables is to create a path connecting some initial state A to some final state B. You would
82 : in this case have representative configurations from your A and B states defined in the input files to pathtools
83 : that we have called start.pdb and end.pdb in the example above. Furthermore, it may be useful to have a few frames
84 : before your start frame and after your end frame. You can use path tools to create these extended paths as shown below.
85 : In this case the final path would now consist of 8 frames. Four of these frames would lie on the vector connecting state
86 : A to state B, there would be one frame each at start.pdb and end.pdb as well as one frame just before start.pdb and one
87 : frame just after end.pdb. All these frames would be equally spaced.
88 :
89 : \verbatim
90 : plumed pathtools --start start.pdb --end end.pdb --nframes 4 --metric OPTIMAL --out path.pdb --nframes-before-start 2 --nframes-after-end 2
91 : \endverbatim
92 :
93 : Notice also that when you re-parameterize paths you must choose two frames to fix. Generally you chose to fix the states
94 : that are representative of your states A and B. By default pathtools will fix the first and last frames. You can, however,
95 : change the states to fix by taking advantage of the fixed flag as shown below.
96 :
97 : \verbatim
98 : plumed pathtools --path inpath.pdb --metric EUCLIDEAN --out outpath.pdb --fixed 2,12
99 : \endverbatim
100 :
101 : */
102 : //+ENDPLUMEDOC
103 :
104 : class PathTools :
105 : public CLTool
106 : {
107 : public:
108 : static void registerKeywords( Keywords& keys );
109 : explicit PathTools(const CLToolOptions& co );
110 : int main(FILE* in, FILE*out,Communicator& pc);
111 4 : std::string description()const {
112 4 : return "print out a description of the keywords for an action in html";
113 : }
114 : };
115 :
116 10427 : PLUMED_REGISTER_CLTOOL(PathTools,"pathtools")
117 :
118 3473 : void PathTools::registerKeywords( Keywords& keys ) {
119 3473 : CLTool::registerKeywords( keys );
120 6946 : keys.add("atoms","--start","a pdb file that contains the structure for the initial frame of your path");
121 6946 : keys.add("atoms","--end","a pdb file that contains the structure for the final frame of your path");
122 6946 : keys.add("atoms-1","--path","a pdb file that contains an initial path in which the frames are not equally spaced");
123 6946 : keys.add("compulsory","--fixed","0","the frames to fix when constructing the path using --path");
124 6946 : keys.add("compulsory","--metric","the measure to use to calculate the distance between frames");
125 6946 : keys.add("compulsory","--out","the name of the file on which to output your path");
126 6946 : keys.add("compulsory","--arg-fmt","%f","the format to use for argument values in your frames");
127 6946 : keys.add("compulsory","--tolerance","1E-4","the tolerance to use for the algorithm that is used to re-parameterize the path");
128 10419 : keys.add("compulsory","--nframes-before-start","1","the number of frames to include in the path before the first frame");
129 6946 : keys.add("compulsory","--nframes","1","the number of frames between the start and end frames in your path");
130 10419 : keys.add("compulsory","--nframes-after-end","1","the number of frames to put after the last frame of your path");
131 3473 : }
132 :
133 8 : PathTools::PathTools(const CLToolOptions& co ):
134 8 : CLTool(co)
135 : {
136 8 : inputdata=commandline;
137 8 : }
138 :
139 4 : int PathTools::main(FILE* in, FILE*out,Communicator& pc) {
140 8 : std::string mtype; parse("--metric",mtype);
141 8 : std::string ifilename; parse("--path",ifilename);
142 8 : std::string ofmt; parse("--arg-fmt",ofmt);
143 8 : std::string ofilename; parse("--out",ofilename);
144 4 : if( ifilename.length()>0 ) {
145 : std::fprintf(out,"Reparameterising path in file named %s so that all frames are equally spaced \n",ifilename.c_str() );
146 2 : FILE* fp=std::fopen(ifilename.c_str(),"r");
147 : // call fclose when fp goes out of scope
148 2 : auto deleter=[](FILE* f) { if(f) std::fclose(f); };
149 : std::unique_ptr<FILE,decltype(deleter)> fp_deleter(fp,deleter);
150 : bool do_read=true; std::vector<std::unique_ptr<ReferenceConfiguration>> frames;
151 24 : while (do_read) {
152 22 : PDB mypdb;
153 : // Read the pdb file
154 22 : do_read=mypdb.readFromFilepointer(fp,false,0.1);
155 22 : if( do_read ) {
156 20 : auto mymsd(metricRegister().create<ReferenceConfiguration>( mtype, mypdb ));
157 20 : frames.emplace_back( std::move(mymsd) );
158 20 : }
159 22 : }
160 4 : std::vector<unsigned> fixed; parseVector("--fixed",fixed);
161 2 : if( fixed.size()==1 ) {
162 1 : if( fixed[0]!=0 ) error("input to --fixed should be two integers");
163 1 : fixed.resize(2); fixed[0]=0; fixed[1]=frames.size()-1;
164 1 : } else if( fixed.size()==2 ) {
165 1 : if( fixed[0]>(frames.size()-1) || fixed[1]>(frames.size()-1) ) {
166 0 : error("input to --fixed should be two numbers between 0 and the number of frames-1");
167 : }
168 : } else {
169 0 : error("input to --fixed should be two integers");
170 : }
171 : std::vector<AtomNumber> atoms; std::vector<std::string> arg_names;
172 22 : for(unsigned i=0; i<frames.size(); ++i) {
173 : frames[i]->getAtomRequests( atoms);
174 20 : frames[i]->getArgumentRequests( arg_names );
175 : }
176 : // Generate stuff to reparameterize
177 2 : Pbc fake_pbc; std::vector<std::unique_ptr<Value>> vals;
178 4 : for(unsigned i=0; i<frames[0]->getNumberOfReferenceArguments(); ++i) {
179 2 : vals.emplace_back(Tools::make_unique<Value>()); vals[vals.size()-1]->setNotPeriodic();
180 : }
181 :
182 : // temporary pointes used to make the conversion once
183 :
184 2 : auto vals_ptr=Tools::unique2raw(vals);
185 : // And reparameterize
186 2 : PathReparameterization myparam( fake_pbc, vals_ptr, frames );
187 : // And make all points equally spaced
188 4 : double tol; parse("--tolerance",tol); myparam.reparameterize( fixed[0], fixed[1], tol );
189 :
190 : // Output data on spacings
191 : double mean=0;
192 2 : MultiValue myvpack( 1, frames[0]->getNumberOfReferenceArguments() + 3*frames[0]->getNumberOfReferencePositions() + 9 );
193 2 : ReferenceValuePack mypack( frames[0]->getNumberOfReferenceArguments(), frames[0]->getNumberOfReferencePositions(), myvpack );
194 20 : for(unsigned i=1; i<frames.size(); ++i) {
195 18 : double len = frames[i]->calc( frames[i-1]->getReferencePositions(), fake_pbc, vals_ptr, frames[i-1]->getReferenceArguments(), mypack, false );
196 : printf("FINAL DISTANCE BETWEEN FRAME %u AND %u IS %f \n",i-1,i,len );
197 18 : mean+=len*len;
198 : }
199 2 : printf("SUGGESTED LAMBDA PARAMETER IS THUS %f \n",2.3/(mean/static_cast<double>( frames.size()-1 )) );
200 :
201 : // Delete all the frames
202 2 : OFile ofile; ofile.open(ofilename);
203 2 : std::vector<std::string> argnames; frames[0]->getArgumentRequests( argnames );
204 2 : std::vector<AtomNumber> atindices; frames[0]->getAtomRequests( atindices );
205 2 : PDB mypdb; mypdb.setAtomNumbers( atindices ); mypdb.setArgumentNames( argnames );
206 22 : for(unsigned i=0; i<frames.size(); ++i) {
207 20 : mypdb.setAtomPositions( frames[i]->getReferencePositions() );
208 40 : for(unsigned j=0; j<argnames.size(); ++j) mypdb.setArgumentValue( argnames[j], frames[i]->getReferenceArguments()[j] );
209 20 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
210 20 : mypdb.print( 10, NULL, ofile, ofmt );
211 : }
212 : // Delete the vals as we don't need them
213 : // for(unsigned i=0; i<vals.size(); ++i) delete vals[i];
214 : // Return as we are done
215 : return 0;
216 8 : }
217 :
218 : // Read initial frame
219 4 : std::string istart; parse("--start",istart); FILE* fp2=fopen(istart.c_str(),"r"); PDB mystartpdb;
220 2 : if( istart.length()==0 ) error("input is missing use --istart + --iend or --path");
221 2 : if( !mystartpdb.readFromFilepointer(fp2,false,0.1) ) error("could not read fila " + istart);
222 2 : auto sframe=metricRegister().create<ReferenceConfiguration>( mtype, mystartpdb );
223 2 : fclose(fp2);
224 :
225 : // Read final frame
226 4 : std::string iend; parse("--end",iend); FILE* fp1=fopen(iend.c_str(),"r"); PDB myendpdb;
227 2 : if( iend.length()==0 ) error("input is missing using --istart + --iend or --path");
228 2 : if( !myendpdb.readFromFilepointer(fp1,false,0.1) ) error("could not read fila " + iend);
229 2 : auto eframe=metricRegister().create<ReferenceConfiguration>( mtype, myendpdb );
230 2 : fclose(fp1);
231 : // Get atoms and arg requests
232 : std::vector<AtomNumber> atoms; std::vector<std::string> arg_names;
233 2 : sframe->getAtomRequests( atoms); eframe->getAtomRequests( atoms);
234 2 : sframe->getArgumentRequests( arg_names ); eframe->getArgumentRequests( arg_names );
235 :
236 : // Now read in the rest of the instructions
237 : unsigned nbefore, nbetween, nafter;
238 6 : parse("--nframes-before-start",nbefore); parse("--nframes",nbetween); parse("--nframes-after-end",nafter);
239 2 : nbetween++;
240 : std::fprintf(out,"Generating linear path connecting structure in file named %s to structure in file named %s \n",istart.c_str(),iend.c_str() );
241 2 : std::fprintf(out,"A path consisting of %u equally-spaced frames before the initial structure, %u frames between the initial and final structures "
242 : "and %u frames after the final structure will be created \n",nbefore,nbetween,nafter);
243 :
244 : // Create a vector of arguments to use for calculating displacements
245 2 : Pbc fpbc;
246 : std::vector<std::unique_ptr<Value>> args;
247 4 : for(unsigned i=0; i<eframe->getNumberOfReferenceArguments(); ++i) {
248 2 : args.emplace_back(Tools::make_unique<Value>()); args[args.size()-1]->setNotPeriodic();
249 : }
250 :
251 : // convert pointer once:
252 2 : auto args_ptr=Tools::unique2raw(args);
253 :
254 : // Calculate the distance between the start and the end
255 2 : MultiValue myvpack( 1, sframe->getNumberOfReferenceArguments() + 3*sframe->getNumberOfReferencePositions() + 9);
256 2 : ReferenceValuePack mypack( sframe->getNumberOfReferenceArguments(), sframe->getNumberOfReferencePositions(), myvpack );
257 2 : sframe->calc( eframe->getReferencePositions(), fpbc, args_ptr, eframe->getReferenceArguments(), mypack, false );
258 : // And the spacing between frames
259 2 : double delr = 1.0 / static_cast<double>( nbetween );
260 : // Calculate the vector connecting the start to the end
261 2 : PDB mypdb; mypdb.setAtomNumbers( sframe->getAbsoluteIndexes() ); mypdb.addBlockEnd( sframe->getAbsoluteIndexes().size() );
262 2 : if( sframe->getArgumentNames().size()>0 ) mypdb.setArgumentNames( sframe->getArgumentNames() );
263 4 : Direction mydir(ReferenceConfigurationOptions("DIRECTION")); sframe->setupPCAStorage( mypack ); mydir.read( mypdb ); mydir.zeroDirection();
264 2 : sframe->extractDisplacementVector( eframe->getReferencePositions(), args_ptr, eframe->getReferenceArguments(), false, mydir );
265 :
266 : // Now create frames
267 2 : OFile ofile; ofile.open(ofilename); unsigned nframes=0;
268 4 : Direction pos(ReferenceConfigurationOptions("DIRECTION")); pos.read( mypdb );
269 4 : for(int i=0; i<nbefore; ++i) {
270 2 : pos.setDirection( sframe->getReferencePositions(), sframe->getReferenceArguments() );
271 2 : pos.displaceReferenceConfiguration( -i*delr, mydir );
272 2 : mypdb.setAtomPositions( pos.getReferencePositions() );
273 4 : for(unsigned j=0; j<pos.getReferenceArguments().size(); ++j) mypdb.setArgumentValue( sframe->getArgumentNames()[j], pos.getReferenceArgument(j) );
274 2 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
275 2 : mypdb.print( 10, NULL, ofile, ofmt ); nframes++;
276 : }
277 8 : for(unsigned i=1; i<nbetween; ++i) {
278 6 : pos.setDirection( sframe->getReferencePositions(), sframe->getReferenceArguments() );
279 6 : pos.displaceReferenceConfiguration( i*delr, mydir );
280 6 : mypdb.setAtomPositions( pos.getReferencePositions() );
281 16 : for(unsigned j=0; j<pos.getReferenceArguments().size(); ++j) mypdb.setArgumentValue( sframe->getArgumentNames()[j], pos.getReferenceArgument(j) );
282 6 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
283 6 : mypdb.print( 10, NULL, ofile, ofmt ); nframes++;
284 : }
285 7 : for(unsigned i=0; i<nafter; ++i) {
286 5 : pos.setDirection( eframe->getReferencePositions(), eframe->getReferenceArguments() );
287 5 : pos.displaceReferenceConfiguration( i*delr, mydir );
288 5 : mypdb.setAtomPositions( pos.getReferencePositions() );
289 13 : for(unsigned j=0; j<pos.getReferenceArguments().size(); ++j) mypdb.setArgumentValue( sframe->getArgumentNames()[j], pos.getReferenceArgument(j) );
290 5 : ofile.printf("REMARK TYPE=%s\n",mtype.c_str() );
291 5 : mypdb.print( 10, NULL, ofile, ofmt ); nframes++;
292 : }
293 :
294 2 : ofile.close(); return 0;
295 8 : }
296 :
297 : } // End of namespace
298 : }
|