Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2024 by Glen Hocky, New York University on behalf of authors
3 :
4 : The sizeshape module is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU Lesser General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : The sizeshape module is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU Lesser General Public License for more details.
13 :
14 : You should have received a copy of the GNU Lesser General Public License
15 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
16 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
17 : #include "colvar/Colvar.h"
18 : #include "core/ActionRegister.h"
19 : #include "tools/Pbc.h"
20 : #include "tools/File.h" // Input and output from files
21 : #include "tools/Matrix.h" // Linear Algebra operations
22 : #include <sstream>
23 : #include <cmath>
24 :
25 : namespace PLMD {
26 : namespace sizeshape {
27 :
28 : //+PLUMEDOC sizeshapeMOD_COLVAR SIZESHAPE_POSITION_MAHA_DIST
29 : /*
30 : Calculates Mahalanobis distance of a current configuration from a given reference configurational distribution in size-and-shape space.
31 :
32 : The Mahalanobis distance is given as:
33 :
34 : \f[
35 : d(\mathbf{x}, \mathbf{\mu}, \mathbf{\Sigma}) = \sqrt{(\mathbf{x}-\mathbf{\mu})^T \mathbf{\Sigma}^{-1} (\mathbf{x}-\mathbf{\mu})}
36 : \f]
37 :
38 : Here \f$\mathbf{x}\f$ is the configuration at time t, \f$\mathbf{\mu}\f$ is the reference and \f$\mathbf{\Sigma}^{-1}\f$ is the \f$N \times N\f$ precision matrix.
39 :
40 : Size-and-shape Gaussian Mixture Model (shapeGMM) \cite Heidi-shapeGMM-2022 is a probabilistic clustering technique that is used to perform structural clusteing on ensemble of molecular configurations and to obtain reference \f$(\mathbf{\mu})\f$ and precision \f$(\mathbf{\Sigma}^{-1})\f$ corresponding to each of the cluster centers. Please chcek out <a href="https://github.com/mccullaghlab/shapeGMMTorch">shapeGMMTorch-GitHub</a> and <a href="https://pypi.org/project/shapeGMMTorch/"> shapeGMMTorch-PyPI</a> for examples and informations on preforming shapeGMM clustering.
41 :
42 : \par Examples
43 : In the following example, a group is defined with atom indices of selected atoms and then Mahalanobis distance is calculated with respect to the given reference and precision. Each file is a space separated list of 3N floating point numbers.
44 :
45 : \plumedfile
46 : UNITS LENGTH=A TIME=ps ENERGY=kcal/mol
47 : GROUP ATOMS=18,20,22,31,33,35,44,46,48,57,59,61,70,72,74,83,85,87,96,98,100,109,111 LABEL=ga_list
48 : #SETTINGS AUXFILE=regtest/sizeshape/rt-mahadist/global_avg.txt
49 : #SETTINGS AUXFILE=regtest/sizeshape/rt-mahadist/global_precision.txt
50 : d: SIZESHAPE_POSITION_MAHA_DIST REFERENCE=global_avg.txt PRECISION=global_precision.txt GROUP=ga_list
51 : PRINT ARG=d STRIDE=1 FILE=output FMT=%8.8f
52 : \endplumedfile
53 :
54 : */
55 : //+ENDPLUMEDOC
56 :
57 : class position_maha_dist : public Colvar {
58 :
59 : private:
60 : bool pbc, squared;
61 : std::string prec_f_name; // precision file name
62 : std::string ref_f_name; // reference file name
63 : IFile in_; // create an object of class IFile
64 : //Log out_;
65 : Matrix<double> ref_str; // coords of reference
66 : Matrix<double> mobile_str; // coords of mobile
67 : Matrix<double> prec; // precision data
68 : Matrix<double> rotation;
69 : Matrix<double> derv_;
70 : Matrix<double> derv_numeric;
71 : void readinputs(); // reads the input data
72 : double dist;
73 : std::vector<AtomNumber> atom_list; // list of atoms
74 : const double SMALL = 1.0E-30;
75 : const double delta = 0.00001;
76 : public:
77 : static void registerKeywords( Keywords& keys );
78 : explicit position_maha_dist(const ActionOptions&);
79 : double determinant(int n, const std::vector<std::vector<double>>* B);
80 : void kabsch_rot_mat(); // gives rotation matrix
81 : double cal_maha_dist(); // calculates the mahalanobis distance
82 : void grad_maha(double); // calculates the gradient
83 : void numeric_maha(); // calculates the numeric gradient
84 : // active methods:
85 : void calculate() override;
86 : };
87 :
88 : PLUMED_REGISTER_ACTION(position_maha_dist,"SIZESHAPE_POSITION_MAHA_DIST")
89 :
90 3 : void position_maha_dist::registerKeywords( Keywords& keys ) {
91 3 : Colvar::registerKeywords( keys );
92 6 : keys.add("compulsory", "PRECISION", "Precision Matrix (inverse of covariance)" );
93 6 : keys.add("compulsory", "REFERENCE", "Reference structure.");
94 6 : keys.add("atoms","GROUP","The group of atoms being used");
95 6 : keys.addFlag("SQUARED",false,"Returns the square of distance.");
96 3 : keys.setValueDescription("the Mahalanobis distance between the instantaneous configuration and a given reference distribution in size-and-shape space");
97 3 : }
98 :
99 : // constructor function
100 1 : position_maha_dist::position_maha_dist(const ActionOptions&ao):
101 : PLUMED_COLVAR_INIT(ao),
102 1 : pbc(true),
103 1 : squared(false),
104 1 : dist(0),
105 2 : prec_f_name(""),
106 2 : ref_f_name("") // Note! no comma here in the last line.
107 : {
108 1 : parseAtomList("GROUP",atom_list);
109 1 : parse("REFERENCE", ref_f_name);
110 1 : parse("PRECISION", prec_f_name);
111 :
112 1 : bool nopbc=!pbc;
113 1 : parseFlag("NOPBC",nopbc);
114 1 : parseFlag("SQUARED",squared);
115 1 : pbc=!nopbc;
116 :
117 1 : checkRead();
118 :
119 1 : log.printf(" of %u atoms\n",static_cast<unsigned>(atom_list.size()));
120 24 : for(unsigned int i=0; i<atom_list.size(); ++i) {
121 23 : log.printf(" %d", atom_list[i].serial());
122 : }
123 :
124 1 : if(squared)log.printf("\n chosen to use SQUARED option for SIZESHAPE_POSITION_MAHA_DIST\n");
125 :
126 1 : if(pbc) log.printf("\n using periodic boundary conditions\n");
127 0 : else log.printf("\n without periodic boundary conditions\n");
128 :
129 2 : addValueWithDerivatives(); setNotPeriodic();
130 :
131 1 : requestAtoms(atom_list);
132 :
133 : // call the readinputs() function here
134 1 : readinputs();
135 :
136 1 : }
137 :
138 : // read inputs function
139 1 : void position_maha_dist::readinputs()
140 : {
141 : unsigned N=getNumberOfAtoms();
142 : // read ref coords
143 1 : in_.open(ref_f_name);
144 :
145 : ref_str.resize(N,3); prec.resize(N,N);
146 :
147 : std::string line_, val_;
148 : unsigned c_=0;
149 :
150 24 : while (c_ < N)
151 : {
152 23 : in_.getline(line_);
153 : std::vector<std::string> items_;
154 23 : std::stringstream check_(line_);
155 :
156 92 : while(std::getline(check_, val_, ' ')) { items_.push_back(val_); }
157 92 : for(int i=0; i<3; ++i) { ref_str(c_,i) = std::stold(items_[i]); }
158 23 : c_ += 1;
159 23 : }
160 1 : in_.close();
161 :
162 : //read precision
163 1 : in_.open(prec_f_name);
164 :
165 : std::string line, val;
166 : unsigned int c = 0;
167 :
168 24 : while(c < N)
169 : {
170 23 : in_.getline(line);
171 :
172 : // vector for storing the objects
173 : std::vector<std::string> items;
174 :
175 : // stringstream helps to treat a string like an ifstream!
176 23 : std::stringstream check(line);
177 :
178 552 : while (std::getline(check, val, ' '))
179 : {
180 529 : items.push_back(val);
181 : }
182 :
183 552 : for(unsigned int i=0; i<N; ++i)
184 : {
185 529 : prec(c, i) = std::stold(items[i]);
186 : }
187 :
188 23 : c += 1;
189 :
190 23 : }
191 1 : in_.close();
192 1 : }
193 :
194 :
195 10 : double position_maha_dist::determinant(int n, const std::vector<std::vector<double>>* B)
196 : {
197 :
198 10 : std::vector<std::vector<double>> A(n, std::vector<double>(n, 0));
199 : // make a copy first!
200 40 : for(int i=0; i<n; ++i) {
201 120 : for(int j=0; j<n; ++j) {A[i][j] = (*B)[i][j];}
202 : }
203 :
204 :
205 : // It calculates determinant of a matrix using partial pivoting.
206 :
207 : double det = 1;
208 :
209 : // Row operations for i = 0, ,,,, n - 2 (n-1 not needed)
210 30 : for ( int i = 0; i < n - 1; i++ )
211 : {
212 : // Partial pivot: find row r below with largest element in column i
213 : int r = i;
214 20 : double maxA = std::abs( A[i][i] );
215 50 : for ( int k = i + 1; k < n; k++ )
216 : {
217 30 : double val = std::abs( A[k][i] );
218 30 : if ( val > maxA )
219 : {
220 : r = k;
221 : maxA = val;
222 : }
223 : }
224 20 : if ( r != i )
225 : {
226 70 : for ( int j = i; j < n; j++ ) std::swap( A[i][j], A[r][j] );
227 20 : det = -det;
228 : }
229 :
230 : // Row operations to make upper-triangular
231 20 : double pivot = A[i][i];
232 20 : if (std::abs( pivot ) < SMALL ) return 0.0; // Singular matrix
233 :
234 50 : for ( int r = i + 1; r < n; r++ ) // On lower rows
235 : {
236 30 : double multiple = A[r][i] / pivot; // Multiple of row i to clear element in ith column
237 110 : for ( int j = i; j < n; j++ ) A[r][j] -= multiple * A[i][j];
238 : }
239 20 : det *= pivot; // Determinant is product of diagonal
240 : }
241 :
242 10 : det *= A[n-1][n-1];
243 :
244 10 : return det;
245 10 : }
246 :
247 : // kabsch rotation
248 5 : void position_maha_dist::kabsch_rot_mat() {
249 :
250 : unsigned N=getNumberOfAtoms();
251 :
252 : Matrix<double> mobile_str_T(3,N);
253 : Matrix<double> prec_dot_ref_str(N,3);
254 : Matrix<double> correlation(3,3);
255 :
256 :
257 5 : transpose(mobile_str, mobile_str_T);
258 5 : mult(prec, ref_str, prec_dot_ref_str);
259 5 : mult(mobile_str_T, prec_dot_ref_str, correlation);
260 :
261 :
262 5 : int rw = correlation.nrows();
263 5 : int cl = correlation.ncols();
264 5 : int sz = rw*cl;
265 :
266 : // SVD part (taking from plu2/src/tools/Matrix.h: pseudoInvert function)
267 :
268 5 : std::vector<double> da(sz);
269 : unsigned k=0;
270 :
271 : // Transfer the matrix to the local array
272 65 : for (int i=0; i<cl; ++i) for (int j=0; j<rw; ++j) da[k++]=static_cast<double>( correlation(j,i) ); // note! its [j][i] not [i][j]
273 :
274 5 : int nsv, info, nrows=rw, ncols=cl;
275 : if(rw>cl) {nsv=cl;} else {nsv=rw;}
276 :
277 : // Create some containers for stuff from single value decomposition
278 5 : std::vector<double> S(nsv);
279 5 : std::vector<double> U(nrows*nrows);
280 5 : std::vector<double> VT(ncols*ncols);
281 5 : std::vector<int> iwork(8*nsv);
282 :
283 : // This optimizes the size of the work array used in lapack singular value decomposition
284 5 : int lwork=-1;
285 5 : std::vector<double> work(1);
286 5 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
287 : //if(info!=0) return info;
288 5 : if(info!=0) log.printf("info:", info);
289 :
290 : // Retrieve correct sizes for work and rellocate
291 5 : lwork=(int) work[0]; work.resize(lwork);
292 :
293 : // This does the singular value decomposition
294 5 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
295 : //if(info!=0) return info;
296 5 : if(info!=0) log.printf("info:", info);
297 :
298 :
299 : // get U and VT in form of 2D vector (U_, VT_)
300 5 : std::vector<std::vector<double>> U_(nrows, std::vector<double>(nrows,0));
301 5 : std::vector<std::vector<double>> VT_(ncols, std::vector<double>(ncols,0));
302 :
303 : int c=0;
304 :
305 65 : for(int i=0; i<nrows; ++i) { for(int j=0; j<nrows; ++j) { U_[j][i] = U[c]; c += 1;} } c = 0; // note! its [j][i] not [i][j]
306 65 : for(int i=0; i<ncols; ++i) { for(int j=0; j<ncols; ++j) { VT_[j][i] = VT[c]; c += 1;} } c=0; // note! its [j][i] not [i][j]
307 :
308 :
309 : // calculate determinants
310 5 : double det_u = determinant(nrows, &U_);
311 5 : double det_vt = determinant(ncols, &VT_);
312 :
313 : // check!
314 13 : if (det_u * det_vt < 0.0) { for(int i=0; i<nrows; ++i) {U_[i][nrows-1] *= -1;} }
315 :
316 :
317 : //Matrix<double> rotation(3,3);
318 5 : rotation.resize(3,3);
319 : Matrix<double> u(3,3), vt(3,3);
320 65 : for(int i=0; i<3; ++i) { for(int j=0; j<3; ++j) { u(i,j)=U_[i][j]; vt(i,j)=VT_[i][j]; } }
321 :
322 : // get rotation matrix
323 5 : mult(u, vt, rotation);
324 :
325 10 : }
326 :
327 :
328 : // calculates maha dist
329 5 : double position_maha_dist::cal_maha_dist() {
330 :
331 : unsigned N=getNumberOfAtoms();
332 :
333 : Matrix<double> rotated_obj(N,3);
334 : // rotate the object
335 5 : mult(mobile_str, rotation, rotated_obj);
336 :
337 : // compute the displacement
338 : Matrix<double> disp(N,3);
339 465 : for(unsigned int i=0; i<N; ++i) { for(unsigned int j=0; j<3; ++j) { disp(i,j) = (rotated_obj(i,j)-ref_str(i,j)); } }
340 :
341 : Matrix<double> prec_dot_disp(N,3);
342 : Matrix<double> disp_T(3,N);
343 : Matrix<double> out(3,3);
344 :
345 5 : mult(prec, disp, prec_dot_disp);
346 5 : transpose(disp, disp_T);
347 5 : mult(disp_T, prec_dot_disp, out);
348 :
349 :
350 :
351 : double maha_d=0.0;
352 20 : for(int i=0; i<3; ++i) { maha_d += out(i,i);}
353 :
354 5 : if (!squared) maha_d = std::sqrt(maha_d);
355 :
356 5 : return maha_d;
357 : }
358 :
359 : // gradient function
360 5 : void position_maha_dist::grad_maha(double d) {
361 :
362 : unsigned N=getNumberOfAtoms();
363 :
364 5 : derv_.resize(N,3);
365 :
366 : Matrix<double> ref_str_rot_T(N,3);
367 : Matrix<double> rot_T(3,3);
368 : Matrix<double> diff_(N,3);
369 :
370 5 : transpose(rotation, rot_T);
371 5 : mult(ref_str, rot_T, ref_str_rot_T);
372 :
373 465 : for(unsigned i=0; i<N; ++i) { for(unsigned j=0; j<3; ++j) { diff_(i,j) = mobile_str(i,j) - ref_str_rot_T(i,j); } }
374 :
375 5 : mult(prec, diff_, derv_);
376 :
377 : //for(unsigned i=0; i<N; ++i){ for(unsigned j=0; j<3; ++j) {derv_(i,j) /= (N*d);} } // dividing by N here!
378 465 : for(unsigned i=0; i<N; ++i) { for(unsigned j=0; j<3; ++j) { if (!squared) {derv_(i,j) /= d;} else {derv_(i,j) *= 2.0;}}}
379 :
380 :
381 5 : }
382 :
383 :
384 : // numeric gradient
385 0 : void position_maha_dist::numeric_maha() {
386 : // This function performs numerical derivative.
387 : unsigned N=getNumberOfAtoms();
388 : derv_numeric.resize(N,3);
389 :
390 0 : for(unsigned int atom=0; atom<N; ++atom) {
391 0 : for(unsigned int j=0; j<3; ++j) {
392 0 : mobile_str(atom,j) += delta;
393 0 : kabsch_rot_mat();
394 0 : derv_numeric(atom,j) = (cal_maha_dist() - dist)/delta;
395 0 : mobile_str(atom,j) -= delta;
396 : }
397 : }
398 :
399 0 : }
400 :
401 :
402 : // calculator
403 5 : void position_maha_dist::calculate() {
404 :
405 5 : if(pbc) makeWhole();
406 : unsigned N=getNumberOfAtoms();
407 :
408 : mobile_str.resize(N,3);
409 :
410 : // load the mobile str
411 120 : for(unsigned int i=0; i<N; ++i) {
412 115 : Vector pos=getPosition(i); // const PLMD::Vector
413 460 : for(unsigned j=0; j<3; ++j) {
414 345 : mobile_str(i,j) = pos[j];
415 : }
416 : }
417 :
418 : // translating the structure to center of geometry
419 5 : double center_of_geometry[3]= {0.0, 0.0, 0.0};
420 :
421 120 : for(unsigned int i=0; i<N; ++i)
422 : {
423 115 : center_of_geometry[0] += mobile_str(i,0); center_of_geometry[1] += mobile_str(i,1); center_of_geometry[2] += mobile_str(i,2);
424 : }
425 :
426 120 : for(unsigned int i=0; i<N; ++i)
427 : {
428 460 : for(unsigned int j=0; j<3; ++j) { mobile_str(i,j) -= (center_of_geometry[j]/N); }
429 : }
430 :
431 5 : kabsch_rot_mat();
432 5 : dist = cal_maha_dist();
433 :
434 5 : grad_maha(dist);
435 : // set derivatives
436 120 : for(unsigned i=0; i<N; ++i) {
437 115 : Vector vi(derv_(i,0), derv_(i,1), derv_(i,2) );
438 115 : setAtomsDerivatives(i, vi);
439 : }
440 5 : setBoxDerivativesNoPbc();
441 5 : setValue(dist);
442 :
443 5 : }
444 :
445 : }
446 : }
447 :
448 :
449 :
|