Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2015-2023 The plumed team
3 : (see the PEOPLE file at the root of the distribution for a list of names)
4 :
5 : See http://www.plumed.org for more information.
6 :
7 : This file is part of plumed, version 2.
8 :
9 : plumed is free software: you can redistribute it and/or modify
10 : it under the terms of the GNU Lesser General Public License as published by
11 : the Free Software Foundation, either version 3 of the License, or
12 : (at your option) any later version.
13 :
14 : plumed is distributed in the hope that it will be useful,
15 : but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : GNU Lesser General Public License for more details.
18 :
19 : You should have received a copy of the GNU Lesser General Public License
20 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
22 : #include "core/ActionShortcut.h"
23 : #include "core/ActionRegister.h"
24 : #include "core/ActionWithValue.h"
25 : #include "core/ActionPilot.h"
26 : #include "core/PlumedMain.h"
27 : #include "core/ActionSet.h"
28 :
29 : //+PLUMEDOC LANDMARKS LANDMARK_SELECT_STRIDE
30 : /*
31 : Select every ith frame from the stored set of configurations
32 :
33 : If you have collected a set of trajectory frames using [COLLECT_FRAMES](COLLECT_FRAMES.md) you can use this action to
34 : select a subset you have collected. This particular method for landmark selection reduces the number of frames by selecting taking every
35 : $i$th frame. So, for example, if you use the input below every 10th frame of the stored trajectory is transferred to the `ll_data` Value
36 : that is output which is output in the PDB file. This happens because we are collecting 1000 trajectory frames in total but only taking
37 : 100 landmarks from this data.
38 :
39 : ```plumed
40 : # This stores the positions of all the first 10 atoms in the system for later analysis
41 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL STRIDE=1 CLEAR=1000
42 :
43 : # Select landmarks
44 : ll: LANDMARK_SELECT_STRIDE ARG=cc NLANDMARKS=100
45 :
46 : # Output the data to a file
47 : DUMPPDB ATOMS=ll_data ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb STRIDE=1000
48 : ```
49 :
50 : If you expand the shortcuts in the input above you will notice that the LANDMARK_SELECT_STRIDE shortcut creates a [DISSIMILARITIES](DISSIMILARITIES.md) action
51 : that calculates the distances between the input frames. We need to calculate these dissimilarities here because the LANDMARK_SELECT_STRIDE shortcut computes the
52 : weights of the landmarks by doing a [VORONOI](VORONOI.md) analysis. If you would like to turn this and the computing of dissimilarities off you can use the
53 : NODISSIMILARITIES flag. If you do not want to compute VORONOI weights you can use the NOVORONOI flag. Be aware, however, that dissimilarities are still computed
54 : if you only the the NOVORONOI flag.
55 :
56 : If you have already computed the dissimilarities between the collected frames you can pass them in input to the LANDMARK_SELECT_STRIDE funtion as shown below:
57 :
58 : ```plumed
59 : # This stores the positions of all the first 10 atoms in the system for later analysis
60 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL STRIDE=1 CLEAR=1000
61 :
62 : # This calculates the dissimilarities between the stored frames
63 : cc_dataT: TRANSPOSE ARG=cc_data
64 : dd: DISSIMILARITIES ARG=cc_data,cc_dataT
65 :
66 : # Select landmarks
67 : ll: LANDMARK_SELECT_STRIDE ARG=cc DISSIMILARITIES=dd NLANDMARKS=100
68 :
69 : # Output the data to a file
70 : DUMPPDB ATOMS=ll_data ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb STRIDE=1000
71 : ```
72 :
73 : Notice that you can also read in dissimilarities from a file using a [CONSTANT](CONSTANT.md) action and pass these directly to the LANDMARK_SELECT_STRIDE action and avoid using COLLECT_FRAMES.
74 :
75 : You can learn how to use landmark selection for dimensionality reduction calculations by working through [this tutorial](https://www.plumed-tutorials.org/lessons/21/006/data/DIMENSIONALITY.html)
76 :
77 : */
78 : //+ENDPLUMEDOC
79 :
80 : //+PLUMEDOC LANDMARKS LANDMARK_SELECT_RANDOM
81 : /*
82 : Select a random set of landmarks from a large set of configurations.
83 :
84 : If you have collected a set of trajectory frames using [COLLECT_FRAMES](COLLECT_FRAMES.md) you can use this action to
85 : select a subset of the configurations you have collected. This particular method for landmark selection reduces the number of frames by
86 : chooseing NLANDMARKS points from the data collected by COLLECT_FRAMES at random. So, for example, if you use the input 100 randomly-selected
87 : points from the 1000 trajectory frames that were by collected by the COLLECT_FRAMES action are transferred to the `ll_data` Value that is output which is output in the PDB file.
88 :
89 : ```plumed
90 : # This stores the positions of all the first 10 atoms in the system for later analysis
91 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL STRIDE=1 CLEAR=1000
92 :
93 : # Select landmarks
94 : ll: LANDMARK_SELECT_RANDOM ARG=cc NLANDMARKS=100
95 :
96 : # Output the data to a file
97 : DUMPPDB ATOMS=ll_data ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb STRIDE=1000
98 : ```
99 :
100 : If you expand the shortcuts in the input above you will notice that the LANDMARK_SELECT_RANDOM shortcut creates a [DISSIMILARITIES](DISSIMILARITIES.md) action
101 : that calculates the distances between the input frames. We need to calculate these dissimilarities here because the LANDMARK_SELECT_RANDOM shortcut computes the
102 : weights of the landmarks by doing a [VORONOI](VORONOI.md) analysis. If you would like to turn this and the computing of dissimilarities off you can use the
103 : NODISSIMILARITIES flag. If you do not want to compute VORONOI weights you can use the NOVORONOI flag. Be aware, however, that dissimilarities are still computed
104 : if you only the the NOVORONOI flag.
105 :
106 : If you have already computed the dissimilarities between the collected frames you can pass them in input to the LANDMARK_SELECT_RANDOM funtion as shown below:
107 :
108 : ```plumed
109 : # This stores the positions of all the first 10 atoms in the system for later analysis
110 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL STRIDE=1 CLEAR=1000
111 :
112 : # This calculates the dissimilarities between the stored frames
113 : cc_dataT: TRANSPOSE ARG=cc_data
114 : dd: DISSIMILARITIES ARG=cc_data,cc_dataT
115 :
116 : # Select landmarks
117 : ll: LANDMARK_SELECT_RANDOM ARG=cc DISSIMILARITIES=dd NLANDMARKS=100
118 :
119 : # Output the data to a file
120 : DUMPPDB ATOMS=ll_data ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb STRIDE=1000
121 : ```
122 :
123 : Notice that you can also read in dissimilarities from a file using a [CONSTANT](CONSTANT.md) action and pass these directly to the LANDMARK_SELECT_RANDOM shortcut and avoid using COLLECT_FRAMES.
124 :
125 : You can learn how to use landmark selection for dimensionality reduction calculations by working through [this tutorial](https://www.plumed-tutorials.org/lessons/21/006/data/DIMENSIONALITY.html)
126 :
127 : */
128 : //+ENDPLUMEDOC
129 :
130 : //+PLUMEDOC LANDMARKS LANDMARK_SELECT_FPS
131 : /*
132 : Select a of landmarks from a large set of configurations using farthest point sampling.
133 :
134 : If you have collected a set of trajectory frames using [COLLECT_FRAMES](COLLECT_FRAMES.md) you can use this action to
135 : select a subset of the configurations you have collected. This shortcut does this using [FARTHEST_POINT_SAMPLING](FARTHEST_POINT_SAMPLING.md)
136 : the first point is thus selected at random. The remaining points are then selected by taking the unselected point in the input data set that is the furthest
137 : from all the points that have been selected thus far. The following input demonstrates how you can use this method:
138 :
139 : ```plumed
140 : # This stores the positions of all the first 10 atoms in the system for later analysis
141 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL STRIDE=1 CLEAR=1000
142 :
143 : # Select landmarks
144 : ll: LANDMARK_SELECT_FPS ARG=cc NLANDMARKS=100
145 :
146 : # Output the data to a file
147 : DUMPPDB ATOMS=ll_data ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb STRIDE=1000
148 : ```
149 :
150 : If you expand the shortcuts in the input above you will notice that the LANDMARK_SELECT_RANDOM shortcut creates a [DISSIMILARITIES](DISSIMILARITIES.md) action
151 : that calculates the distances between the input frames. We have to compute these dissimilarities in order to perform the farthest point sampling here so you cannot use the
152 : NODISSIMILARITIES flag with this action. However, we also need the dissimilarities to compute the weights of the landmarks as this is done by performing a [VORONOI](VORONOI.md) analysis.
153 : If you would like to turn off the computation of the VORONOI weights you can use the NOVORONOI flag.
154 :
155 : If you have already computed the dissimilarities between the collected frames you can pass them in input to the LANDMARK_SELECT_FPS funtion as shown below:
156 :
157 : ```plumed
158 : # This stores the positions of all the first 10 atoms in the system for later analysis
159 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL STRIDE=1 CLEAR=1000
160 :
161 : # This calculates the dissimilarities between the stored frames
162 : cc_dataT: TRANSPOSE ARG=cc_data
163 : dd: DISSIMILARITIES ARG=cc_data,cc_dataT
164 :
165 : # Select landmarks
166 : ll: LANDMARK_SELECT_FPS ARG=cc DISSIMILARITIES=dd NLANDMARKS=100
167 :
168 : # Output the data to a file
169 : DUMPPDB ATOMS=ll_data ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb STRIDE=1000
170 : ```
171 :
172 : Notice that you can also read in dissimilarities from a file using a [CONSTANT](CONSTANT.md) action and pass these directly to the LANDMARK_SELECT_FPS shortcut and avoid using COLLECT_FRAMES.
173 :
174 : You can learn how to use landmark selection for dimensionality reduction calculations by working through [this tutorial](https://www.plumed-tutorials.org/lessons/21/006/data/DIMENSIONALITY.html)
175 :
176 : */
177 : //+ENDPLUMEDOC
178 :
179 : namespace PLMD {
180 : namespace landmarks {
181 :
182 : class LandmarkSelection : public ActionShortcut {
183 : public:
184 : static void registerKeywords( Keywords& keys );
185 : explicit LandmarkSelection( const ActionOptions& ao );
186 : };
187 :
188 : PLUMED_REGISTER_ACTION(LandmarkSelection,"LANDMARK_SELECT_STRIDE")
189 : PLUMED_REGISTER_ACTION(LandmarkSelection,"LANDMARK_SELECT_RANDOM")
190 : PLUMED_REGISTER_ACTION(LandmarkSelection,"LANDMARK_SELECT_FPS")
191 :
192 17 : void LandmarkSelection::registerKeywords( Keywords& keys ) {
193 17 : ActionShortcut::registerKeywords( keys );
194 17 : keys.add("optional","ARG","the COLLECT_FRAMES action that you used to get the data");
195 17 : keys.add("optional","DISSIMILARITIES","the matrix of dissimilarities if this is not provided the squared dissimilarities are calculated");
196 17 : keys.add("compulsory","NLANDMARKS","the numbe rof landmarks you would like to create");
197 17 : keys.add("optional","SEED","a random number seed");
198 17 : keys.addFlag("NOVORONOI",false,"do not do a Voronoi analysis of the data to determine weights of final points");
199 17 : keys.addFlag("NODISSIMILARITIES",false,"do not calculate the dissimilarities");
200 34 : keys.addOutputComponent("data","ARG","matrix","the data that is being collected by this action");
201 34 : keys.addOutputComponent("logweights","ARG","vector","the logarithms of the weights of the data points");
202 34 : keys.addOutputComponent("rectdissims","DISSIMILARITIES","matrix","a rectangular matrix containing the distances between the landmark points and the rest of the points");
203 34 : keys.addOutputComponent("sqrdissims","DISSIMILARITIES","matrix","a square matrix containing the distances between each pair of landmark points");
204 17 : keys.needsAction("LOGSUMEXP");
205 17 : keys.needsAction("TRANSPOSE");
206 17 : keys.needsAction("DISSIMILARITIES");
207 17 : keys.needsAction("ONES");
208 17 : keys.needsAction("CREATE_MASK");
209 17 : keys.needsAction("FARTHEST_POINT_SAMPLING");
210 17 : keys.needsAction("SELECT_WITH_MASK");
211 17 : keys.needsAction("COMBINE");
212 17 : keys.needsAction("VORONOI");
213 17 : keys.needsAction("MATRIX_PRODUCT");
214 17 : keys.needsAction("CUSTOM");
215 17 : }
216 :
217 8 : LandmarkSelection::LandmarkSelection( const ActionOptions& ao ):
218 : Action(ao),
219 8 : ActionShortcut(ao) {
220 : std::string nlandmarks;
221 8 : parse("NLANDMARKS",nlandmarks);
222 : bool novoronoi;
223 8 : parseFlag("NOVORONOI",novoronoi);
224 :
225 : bool nodissims;
226 16 : parseFlag("NODISSIMILARITIES",nodissims);
227 : std::string argn, dissims;
228 8 : parse("ARG",argn);
229 16 : parse("DISSIMILARITIES",dissims);
230 8 : if( argn.length()>0 ) {
231 7 : ActionShortcut* as = plumed.getActionSet().getShortcutActionWithLabel( argn );
232 7 : if( !as || as->getName()!="COLLECT_FRAMES" ) {
233 0 : error("found no COLLECT_FRAMES action with label " + argn );
234 : }
235 : // Get the weights
236 14 : readInputLine( getShortcutLabel() + "_allweights: LOGSUMEXP ARG=" + argn + "_logweights");
237 : }
238 8 : if( dissims.length()>0 ) {
239 4 : ActionWithValue* ds = plumed.getActionSet().selectWithLabel<ActionWithValue*>( dissims );
240 4 : if( (ds->copyOutput(0))->getRank()!=2 ) {
241 0 : error("input for dissimilarities shoudl be a matrix");
242 : }
243 : // Calculate the dissimilarities if the user didn't specify them
244 4 : } else if( !nodissims ) {
245 2 : readInputLine( getShortcutLabel() + "_" + argn + "_dataT: TRANSPOSE ARG=" + argn + "_data");
246 1 : dissims = getShortcutLabel() + "_dissims";
247 2 : readInputLine( getShortcutLabel() + "_dissims: DISSIMILARITIES SQUARED ARG=" + argn + "_data," + getShortcutLabel() + "_" + argn + "_dataT");
248 : }
249 : // This deals with a corner case whereby users have a matrix of dissimilarities but no corresponding coordinates for these frames
250 8 : if( argn.length()==0 && dissims.size()>0 ) {
251 1 : ActionWithValue* ds = plumed.getActionSet().selectWithLabel<ActionWithValue*>( dissims );
252 1 : if( ds->getName()!="CONSTANT" || (ds->copyOutput(0))->getRank()!=2 ) {
253 0 : error("set ARG as well as DISSIMILARITIES");
254 : }
255 : std::string size;
256 1 : Tools::convert( (ds->copyOutput(0))->getShape()[0], size );
257 2 : readInputLine( getShortcutLabel() + "_allweights: ONES SIZE=" + size );
258 : }
259 :
260 8 : if( getName()=="LANDMARK_SELECT_STRIDE" ) {
261 12 : readInputLine( getShortcutLabel() + "_mask: CREATE_MASK ARG=" + getShortcutLabel() + "_allweights TYPE=stride NZEROS=" + nlandmarks );
262 2 : } else if( getName()=="LANDMARK_SELECT_RANDOM" ) {
263 1 : if( argn.length()==0 ) {
264 0 : error("must set COLLECT_FRAMES object for landmark selection using ARG keyword");
265 : }
266 : std::string seed;
267 2 : parse("SEED",seed);
268 1 : if( seed.length()>0 ) {
269 2 : seed = " SEED=" + seed;
270 : }
271 2 : readInputLine( getShortcutLabel() + "_mask: CREATE_MASK ARG=" + getShortcutLabel() + "_allweights TYPE=random NZEROS=" + nlandmarks + seed );
272 1 : } else if( getName()=="LANDMARK_SELECT_FPS" ) {
273 1 : if( dissims.length()==0 ) {
274 0 : error("dissimiarities must be defined to use FPS sampling");
275 : }
276 : std::string seed;
277 2 : parse("SEED",seed);
278 1 : if( seed.length()>0 ) {
279 0 : seed = " SEED=" + seed;
280 : }
281 2 : readInputLine( getShortcutLabel() + "_mask: FARTHEST_POINT_SAMPLING ARG=" + dissims + " NZEROS=" + nlandmarks + seed );
282 : }
283 :
284 8 : if( argn.length()>0 ) {
285 14 : readInputLine( getShortcutLabel() + "_data: SELECT_WITH_MASK ARG=" + argn + "_data ROW_MASK=" + getShortcutLabel() + "_mask");
286 : }
287 :
288 : unsigned nland;
289 8 : Tools::convert( nlandmarks, nland );
290 8 : if( dissims.length()>0 ) {
291 5 : ActionWithValue* ds = plumed.getActionSet().selectWithLabel<ActionWithValue*>( dissims );
292 5 : if( (ds->copyOutput(0))->getShape()[0]==nland ) {
293 1 : if( !novoronoi ) {
294 0 : warning("cannot use voronoi procedure to give weights as not all distances between points are known");
295 0 : novoronoi=true;
296 : }
297 2 : readInputLine( getShortcutLabel() + "_sqrdissims: COMBINE ARG=" + dissims + " PERIODIC=NO");
298 : } else {
299 8 : readInputLine( getShortcutLabel() + "_rmask: CREATE_MASK ARG=" + getShortcutLabel() + "_allweights TYPE=nomask");
300 8 : readInputLine( getShortcutLabel() + "_rectdissims: SELECT_WITH_MASK ARG=" + dissims + " COLUMN_MASK=" + getShortcutLabel() + "_mask ROW_MASK=" + getShortcutLabel() + "_rmask");
301 8 : readInputLine( getShortcutLabel() + "_sqrdissims: SELECT_WITH_MASK ARG=" + dissims + " ROW_MASK=" + getShortcutLabel() + "_mask COLUMN_MASK=" + getShortcutLabel() + "_mask");
302 : }
303 : }
304 :
305 8 : if( !novoronoi && argn.length()>0 && dissims.length()>0 ) {
306 6 : readInputLine( getShortcutLabel() + "_voronoi: VORONOI ARG=" + getShortcutLabel() + "_rectdissims");
307 6 : readInputLine( getShortcutLabel() + "_allweightsT: TRANSPOSE ARG=" + getShortcutLabel() + "_allweights");
308 6 : readInputLine( getShortcutLabel() + "_weightsT: MATRIX_PRODUCT ARG=" + getShortcutLabel() + "_allweightsT," + getShortcutLabel() + "_voronoi");
309 6 : readInputLine( getShortcutLabel() + "_weights: TRANSPOSE ARG=" + getShortcutLabel() + "_weightsT");
310 6 : readInputLine( getShortcutLabel() + "_logweights: CUSTOM ARG=" + getShortcutLabel() + "_weights FUNC=log(x) PERIODIC=NO");
311 5 : } else if( argn.length()>0 ) {
312 4 : if( !novoronoi ) {
313 0 : warning("cannot use voronoi procedure to give weights to landmark points as DISSIMILARITIES was not set");
314 : }
315 8 : readInputLine( getShortcutLabel() + "_logweights: SELECT_WITH_MASK ARG=" + argn + "_logweights MASK=" + getShortcutLabel() + "_mask");
316 : }
317 : // Create the vector of ones that is needed by Classical MDS
318 8 : if( argn.length()>0 ) {
319 14 : readInputLine( getShortcutLabel() + "_ones: SELECT_WITH_MASK ARG=" + argn + "_ones MASK=" + getShortcutLabel() + "_mask");
320 : }
321 8 : }
322 :
323 : }
324 : }
|