Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 : Copyright (c) 2015-2023 The plumed team 3 : (see the PEOPLE file at the root of the distribution for a list of names) 4 : 5 : See http://www.plumed.org for more information. 6 : 7 : This file is part of plumed, version 2. 8 : 9 : plumed is free software: you can redistribute it and/or modify 10 : it under the terms of the GNU Lesser General Public License as published by 11 : the Free Software Foundation, either version 3 of the License, or 12 : (at your option) any later version. 13 : 14 : plumed is distributed in the hope that it will be useful, 15 : but WITHOUT ANY WARRANTY; without even the implied warranty of 16 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 : GNU Lesser General Public License for more details. 18 : 19 : You should have received a copy of the GNU Lesser General Public License 20 : along with plumed. If not, see <http://www.gnu.org/licenses/>. 21 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ 22 : #include "matrixtools/MatrixOperationBase.h" 23 : #include "core/ActionRegister.h" 24 : #include "tools/Random.h" 25 : 26 : //+PLUMEDOC LANDMARKS FARTHEST_POINT_SAMPLING 27 : /* 28 : Select a set of landmarks using farthest point sampling. 29 : 30 : This action is used within the [LANDMARK_SELECT_FPS](LANDMARK_SELECT_FPS.md) shortcut, which performs farthest point sampling. 31 : Farthest point sampling is a method of selecting a subset of input coordinates that works by selecting a first point at random. 32 : The remaining points are then selected by taking the unselected point in the input data set that is the furthest 33 : from all the points that have been selected thus far. 34 : 35 : This particular action is designed to be used in conjunction with [SELECT_WITH_MASK](SELECT_WITH_MASK.md) in the same way that 36 : [CREATE_MASK](CREATE_MASK.md) is designed to be be used with that action. This action takes an $N\times N$ matrix of dissimilarities 37 : in input and outputs and $N$ dimensional vector whose elements are ones and zeros. As shown in the example input below, this output 38 : vector is passed to a [SELECT_WITH_MASK](SELECT_WITH_MASK.md) using the MASK keyword. 39 : 40 : The example below thus shows how this action is used in the [LANDMARK_SELECT_FPS](LANDMARK_SELECT_FPS.md) shortcut to select 100 landmark 41 : frames from the trajectory using farthest point sampling. 42 : 43 : ```plumed 44 : # This stores the positions of all the first 10 atoms in the system for later analysis 45 : cc: COLLECT_FRAMES ATOMS=1,2,3,4,5,6,7,8,9,10 ALIGN=OPTIMAL 46 : 47 : # We now compute the dissimilarities between these frames 48 : cc_dataT: TRANSPOSE ARG=cc_data 49 : dd: DISSIMILARITIES ARG=cc_data,cc_dataT 50 : 51 : # Create a mask that will be used to create our landmark data 52 : mask: FARTHEST_POINT_SAMPLING ARG=dd NZEROS=100 53 : 54 : # These are the landmark points 55 : landmarks: SELECT_WITH_MASK ARG=cc_data ROW_MASK=mask 56 : 57 : # Output the landmarks to a file 58 : DUMPPDB ATOMS=landmarks ATOM_INDICES=1,2,3,4,5,6,7,8,9,10 FILE=traj.pdb 59 : ``` 60 : 61 : This only saves the coordinates of the landmark points. If you look at the shortcuts in the documentation for [LANDMARK_SELECT_FPS](LANDMARK_SELECT_FPS.md) 62 : you can see how the shortcut also saves information on the dissimilarities between the landmarks, the dissimilarities between the landmarks and all the other 63 : points and the weights of the landmarks, which are determined using a [VORONOI](VORONOI.md) analysis. 64 : 65 : */ 66 : //+ENDPLUMEDOC 67 : 68 : namespace PLMD { 69 : namespace landmarks { 70 : 71 : class FarthestPointSampling : public matrixtools::MatrixOperationBase { 72 : private: 73 : unsigned seed; 74 : unsigned nlandmarks; 75 : public: 76 : static void registerKeywords( Keywords& keys ); 77 : explicit FarthestPointSampling( const ActionOptions& ao ); 78 0 : unsigned getNumberOfDerivatives() override { 79 0 : return 0; 80 : } 81 : void prepare() override ; 82 : void calculate() override ; 83 0 : void apply() override {} 84 0 : double getForceOnMatrixElement( const unsigned& jrow, const unsigned& krow ) const { 85 0 : plumed_merror("this should not be called"); 86 : } 87 : }; 88 : 89 : PLUMED_REGISTER_ACTION(FarthestPointSampling,"FARTHEST_POINT_SAMPLING") 90 : 91 4 : void FarthestPointSampling::registerKeywords( Keywords& keys ) { 92 4 : matrixtools::MatrixOperationBase::registerKeywords( keys ); 93 4 : keys.add("compulsory","NZEROS","the number of landmark points that you want to select"); 94 4 : keys.add("compulsory","SEED","1234","a random number seed"); 95 8 : keys.setValueDescription("vector","a vector which has as many elements as there are rows in the input matrix of dissimilarities. NZEROS of the elements in this vector are equal to one, the rest of the elements are equal to zero. The nodes that have elements equal to one are the NZEROS points that are farthest appart according to the input dissimilarities"); 96 4 : } 97 : 98 1 : FarthestPointSampling::FarthestPointSampling( const ActionOptions& ao ): 99 : Action(ao), 100 1 : MatrixOperationBase(ao) { 101 1 : if( getPntrToArgument(0)->getShape()[0]!=getPntrToArgument(0)->getShape()[1] ) { 102 0 : error("input to this argument should be a square matrix of dissimilarities"); 103 : } 104 1 : parse("NZEROS",nlandmarks); 105 1 : parse("SEED",seed); 106 1 : log.printf(" selecting %d landmark points \n", nlandmarks ); 107 : 108 1 : std::vector<unsigned> shape(1); 109 1 : shape[0] = getPntrToArgument(0)->getShape()[0]; 110 1 : addValue( shape ); 111 1 : setNotPeriodic(); 112 1 : getPntrToComponent(0)->buildDataStore(); 113 1 : } 114 : 115 1 : void FarthestPointSampling::prepare() { 116 1 : Value* myval = getPntrToComponent(0); 117 1 : if( myval->getShape()[0]!=getPntrToArgument(0)->getShape()[0] ) { 118 1 : std::vector<unsigned> shape(1); 119 1 : shape[0] = getPntrToArgument(0)->getShape()[0]; 120 1 : myval->setShape(shape); 121 : } 122 4 : for(unsigned i=0; i<nlandmarks; ++i) { 123 3 : myval->set( i, 0.0 ); 124 : } 125 11 : for(unsigned i=nlandmarks; i<myval->getShape()[0]; ++i) { 126 10 : myval->set( i, 1.0 ); 127 : } 128 1 : } 129 : 130 1 : void FarthestPointSampling::calculate() { 131 1 : Value* myval=getPntrToComponent(0); 132 1 : unsigned npoints = getPntrToArgument(0)->getShape()[0]; 133 14 : for(unsigned i=0; i<npoints; ++i) { 134 13 : myval->set( i, 1.0 ); 135 : } 136 1 : std::vector<unsigned> landmarks( nlandmarks ); 137 : 138 : // Select first point at random 139 1 : Random random; 140 1 : random.setSeed(-seed); 141 1 : double rand=random.RandU01(); 142 1 : landmarks[0] = std::floor( npoints*rand ); 143 1 : myval->set( landmarks[0], 0 ); 144 : 145 : // Now find distance to all other points (N.B. We can use squared distances here for speed) 146 1 : Matrix<double> distances( nlandmarks, npoints ); 147 : Value* myarg = getPntrToArgument(0); 148 14 : for(unsigned i=0; i<npoints; ++i) { 149 13 : distances(0,i) = myarg->get( landmarks[0]*npoints + i ); 150 : } 151 : 152 : // Now find all other landmarks 153 3 : for(unsigned i=1; i<nlandmarks; ++i) { 154 : // Find point that has the largest minimum distance from the landmarks selected thus far 155 : double maxd=0; 156 28 : for(unsigned j=0; j<npoints; ++j) { 157 26 : double mind=distances(0,j); 158 39 : for(unsigned k=1; k<i; ++k) { 159 13 : if( distances(k,j)<mind ) { 160 : mind=distances(k,j); 161 : } 162 : } 163 26 : if( mind>maxd ) { 164 : maxd=mind; 165 5 : landmarks[i]=j; 166 : } 167 : } 168 2 : myval->set( landmarks[i], 0 ); 169 28 : for(unsigned k=0; k<npoints; ++k) { 170 26 : distances(i,k) = myarg->get( landmarks[i]*npoints + k ); 171 : } 172 : } 173 1 : } 174 : 175 : } 176 : }