Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2024 by Glen Hocky, New York University on behalf of authors
3 :
4 : The sizeshape module is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU Lesser General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : The sizeshape module is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU Lesser General Public License for more details.
13 :
14 : You should have received a copy of the GNU Lesser General Public License
15 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
16 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
17 : #include "colvar/Colvar.h"
18 : #include "core/ActionRegister.h"
19 : #include "tools/Pbc.h"
20 : #include "tools/File.h" // Input and output from files
21 : #include "tools/Matrix.h" // Linear Algebra operations
22 : #include <sstream>
23 : #include <cmath>
24 :
25 : namespace PLMD {
26 : namespace sizeshape {
27 :
28 : //+PLUMEDOC sizeshapeMOD_COLVAR SIZESHAPE_POSITION_MAHA_DIST
29 : /*
30 : Calculates Mahalanobis distance of a current configuration from a given reference configurational distribution in size-and-shape space.
31 :
32 : The Mahalanobis distance is given as:
33 :
34 : \f[
35 : d(\mathbf{x}, \mathbf{\mu}, \mathbf{\Sigma}) = \sqrt{(\mathbf{x}-\mathbf{\mu})^T \mathbf{\Sigma}^{-1} (\mathbf{x}-\mathbf{\mu})}
36 : \f]
37 :
38 : Here \f$\mathbf{x}\f$ is the configuration at time t, \f$\mathbf{\mu}\f$ is the reference and \f$\mathbf{\Sigma}^{-1}\f$ is the \f$N \times N\f$ precision matrix.
39 :
40 : Size-and-shape Gaussian Mixture Model (shapeGMM) \cite Heidi-shapeGMM-2022 is a probabilistic clustering technique that is used to perform structural clusteing on ensemble of molecular configurations and to obtain reference \f$(\mathbf{\mu})\f$ and precision \f$(\mathbf{\Sigma}^{-1})\f$ corresponding to each of the cluster centers. Please chcek out <a href="https://github.com/mccullaghlab/shapeGMMTorch">shapeGMMTorch-GitHub</a> and <a href="https://pypi.org/project/shapeGMMTorch/"> shapeGMMTorch-PyPI</a> for examples and informations on preforming shapeGMM clustering.
41 :
42 : \par Examples
43 : In the following example, a group is defined with atom indices of selected atoms and then Mahalanobis distance is calculated with respect to the given reference and precision. Each file is a space separated list of 3N floating point numbers.
44 :
45 : \plumedfile
46 : UNITS LENGTH=A TIME=ps ENERGY=kcal/mol
47 : GROUP ATOMS=18,20,22,31,33,35,44,46,48,57,59,61,70,72,74,83,85,87,96,98,100,109,111 LABEL=ga_list
48 : #SETTINGS AUXFILE=regtest/sizeshape/rt-mahadist/global_avg.txt
49 : #SETTINGS AUXFILE=regtest/sizeshape/rt-mahadist/global_precision.txt
50 : d: SIZESHAPE_POSITION_MAHA_DIST REFERENCE=global_avg.txt PRECISION=global_precision.txt GROUP=ga_list
51 : PRINT ARG=d STRIDE=1 FILE=output FMT=%8.8f
52 : \endplumedfile
53 :
54 : */
55 : //+ENDPLUMEDOC
56 :
57 : class position_maha_dist : public Colvar {
58 :
59 : private:
60 : bool pbc, squared;
61 : std::string prec_f_name; // precision file name
62 : std::string ref_f_name; // reference file name
63 : IFile in_; // create an object of class IFile
64 : //Log out_;
65 : Matrix<double> ref_str; // coords of reference
66 : Matrix<double> mobile_str; // coords of mobile
67 : Matrix<double> prec; // precision data
68 : Matrix<double> rotation;
69 : Matrix<double> derv_;
70 : Matrix<double> derv_numeric;
71 : void readinputs(); // reads the input data
72 : double dist;
73 : std::vector<AtomNumber> atom_list; // list of atoms
74 : const double SMALL = 1.0E-30;
75 : const double delta = 0.00001;
76 : public:
77 : static void registerKeywords( Keywords& keys );
78 : explicit position_maha_dist(const ActionOptions&);
79 : double determinant(int n, const std::vector<std::vector<double>>* B);
80 : void kabsch_rot_mat(); // gives rotation matrix
81 : double cal_maha_dist(); // calculates the mahalanobis distance
82 : void grad_maha(double); // calculates the gradient
83 : void numeric_maha(); // calculates the numeric gradient
84 : // active methods:
85 : void calculate() override;
86 : };
87 :
88 : PLUMED_REGISTER_ACTION(position_maha_dist,"SIZESHAPE_POSITION_MAHA_DIST")
89 :
90 3 : void position_maha_dist::registerKeywords( Keywords& keys ) {
91 3 : Colvar::registerKeywords( keys );
92 3 : keys.add("compulsory", "PRECISION", "Precision Matrix (inverse of covariance)" );
93 3 : keys.add("compulsory", "REFERENCE", "Reference structure.");
94 3 : keys.add("atoms","GROUP","The group of atoms being used");
95 3 : keys.addFlag("SQUARED",false,"Returns the square of distance.");
96 6 : keys.setValueDescription("scalar","the Mahalanobis distance between the instantaneous configuration and a given reference distribution in size-and-shape space");
97 3 : }
98 :
99 : // constructor function
100 1 : position_maha_dist::position_maha_dist(const ActionOptions&ao):
101 : PLUMED_COLVAR_INIT(ao),
102 1 : pbc(true),
103 1 : squared(false),
104 1 : dist(0),
105 2 : prec_f_name(""),
106 2 : ref_f_name("") { // Note! no comma here in the last line.
107 1 : parseAtomList("GROUP",atom_list);
108 1 : parse("REFERENCE", ref_f_name);
109 1 : parse("PRECISION", prec_f_name);
110 :
111 1 : bool nopbc=!pbc;
112 1 : parseFlag("NOPBC",nopbc);
113 1 : parseFlag("SQUARED",squared);
114 1 : pbc=!nopbc;
115 :
116 1 : checkRead();
117 :
118 1 : log.printf(" of %u atoms\n",static_cast<unsigned>(atom_list.size()));
119 24 : for(unsigned int i=0; i<atom_list.size(); ++i) {
120 23 : log.printf(" %d", atom_list[i].serial());
121 : }
122 :
123 1 : if(squared) {
124 0 : log.printf("\n chosen to use SQUARED option for SIZESHAPE_POSITION_MAHA_DIST\n");
125 : }
126 :
127 1 : if(pbc) {
128 1 : log.printf("\n using periodic boundary conditions\n");
129 : } else {
130 0 : log.printf("\n without periodic boundary conditions\n");
131 : }
132 :
133 1 : addValueWithDerivatives();
134 1 : setNotPeriodic();
135 :
136 1 : requestAtoms(atom_list);
137 :
138 : // call the readinputs() function here
139 1 : readinputs();
140 :
141 1 : }
142 :
143 : // read inputs function
144 1 : void position_maha_dist::readinputs() {
145 : unsigned N=getNumberOfAtoms();
146 : // read ref coords
147 1 : in_.open(ref_f_name);
148 :
149 : ref_str.resize(N,3);
150 : prec.resize(N,N);
151 :
152 : std::string line_, val_;
153 : unsigned c_=0;
154 :
155 24 : while (c_ < N) {
156 23 : in_.getline(line_);
157 : std::vector<std::string> items_;
158 23 : std::stringstream check_(line_);
159 :
160 92 : while(std::getline(check_, val_, ' ')) {
161 69 : items_.push_back(val_);
162 : }
163 92 : for(int i=0; i<3; ++i) {
164 69 : ref_str(c_,i) = std::stold(items_[i]);
165 : }
166 23 : c_ += 1;
167 23 : }
168 1 : in_.close();
169 :
170 : //read precision
171 1 : in_.open(prec_f_name);
172 :
173 : std::string line, val;
174 : unsigned int c = 0;
175 :
176 24 : while(c < N) {
177 23 : in_.getline(line);
178 :
179 : // vector for storing the objects
180 : std::vector<std::string> items;
181 :
182 : // stringstream helps to treat a string like an ifstream!
183 23 : std::stringstream check(line);
184 :
185 552 : while (std::getline(check, val, ' ')) {
186 529 : items.push_back(val);
187 : }
188 :
189 552 : for(unsigned int i=0; i<N; ++i) {
190 529 : prec(c, i) = std::stold(items[i]);
191 : }
192 :
193 23 : c += 1;
194 :
195 23 : }
196 1 : in_.close();
197 1 : }
198 :
199 :
200 10 : double position_maha_dist::determinant(int n, const std::vector<std::vector<double>>* B) {
201 :
202 10 : std::vector<std::vector<double>> A(n, std::vector<double>(n, 0));
203 : // make a copy first!
204 40 : for(int i=0; i<n; ++i) {
205 120 : for(int j=0; j<n; ++j) {
206 90 : A[i][j] = (*B)[i][j];
207 : }
208 : }
209 :
210 :
211 : // It calculates determinant of a matrix using partial pivoting.
212 :
213 : double det = 1;
214 :
215 : // Row operations for i = 0, ,,,, n - 2 (n-1 not needed)
216 30 : for ( int i = 0; i < n - 1; i++ ) {
217 : // Partial pivot: find row r below with largest element in column i
218 : int r = i;
219 20 : double maxA = std::abs( A[i][i] );
220 50 : for ( int k = i + 1; k < n; k++ ) {
221 30 : double val = std::abs( A[k][i] );
222 30 : if ( val > maxA ) {
223 : r = k;
224 : maxA = val;
225 : }
226 : }
227 20 : if ( r != i ) {
228 70 : for ( int j = i; j < n; j++ ) {
229 50 : std::swap( A[i][j], A[r][j] );
230 : }
231 20 : det = -det;
232 : }
233 :
234 : // Row operations to make upper-triangular
235 20 : double pivot = A[i][i];
236 20 : if (std::abs( pivot ) < SMALL ) {
237 : return 0.0; // Singular matrix
238 : }
239 :
240 50 : for ( int r = i + 1; r < n; r++ ) { // On lower rows
241 30 : double multiple = A[r][i] / pivot; // Multiple of row i to clear element in ith column
242 110 : for ( int j = i; j < n; j++ ) {
243 80 : A[r][j] -= multiple * A[i][j];
244 : }
245 : }
246 20 : det *= pivot; // Determinant is product of diagonal
247 : }
248 :
249 10 : det *= A[n-1][n-1];
250 :
251 10 : return det;
252 10 : }
253 :
254 : // kabsch rotation
255 5 : void position_maha_dist::kabsch_rot_mat() {
256 :
257 : unsigned N=getNumberOfAtoms();
258 :
259 : Matrix<double> mobile_str_T(3,N);
260 : Matrix<double> prec_dot_ref_str(N,3);
261 : Matrix<double> correlation(3,3);
262 :
263 :
264 5 : transpose(mobile_str, mobile_str_T);
265 5 : mult(prec, ref_str, prec_dot_ref_str);
266 5 : mult(mobile_str_T, prec_dot_ref_str, correlation);
267 :
268 :
269 5 : int rw = correlation.nrows();
270 5 : int cl = correlation.ncols();
271 5 : int sz = rw*cl;
272 :
273 : // SVD part (taking from plu2/src/tools/Matrix.h: pseudoInvert function)
274 :
275 5 : std::vector<double> da(sz);
276 : unsigned k=0;
277 :
278 : // Transfer the matrix to the local array
279 20 : for (int i=0; i<cl; ++i)
280 60 : for (int j=0; j<rw; ++j) {
281 45 : da[k++]=static_cast<double>( correlation(j,i) ); // note! its [j][i] not [i][j]
282 : }
283 :
284 5 : int nsv, info, nrows=rw, ncols=cl;
285 : if(rw>cl) {
286 : nsv=cl;
287 : } else {
288 : nsv=rw;
289 : }
290 :
291 : // Create some containers for stuff from single value decomposition
292 5 : std::vector<double> S(nsv);
293 5 : std::vector<double> U(nrows*nrows);
294 5 : std::vector<double> VT(ncols*ncols);
295 5 : std::vector<int> iwork(8*nsv);
296 :
297 : // This optimizes the size of the work array used in lapack singular value decomposition
298 5 : int lwork=-1;
299 5 : std::vector<double> work(1);
300 5 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
301 : //if(info!=0) return info;
302 5 : if(info!=0) {
303 0 : log.printf("info:", info);
304 : }
305 :
306 : // Retrieve correct sizes for work and rellocate
307 5 : lwork=(int) work[0];
308 5 : work.resize(lwork);
309 :
310 : // This does the singular value decomposition
311 5 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
312 : //if(info!=0) return info;
313 5 : if(info!=0) {
314 0 : log.printf("info:", info);
315 : }
316 :
317 :
318 : // get U and VT in form of 2D vector (U_, VT_)
319 5 : std::vector<std::vector<double>> U_(nrows, std::vector<double>(nrows,0));
320 5 : std::vector<std::vector<double>> VT_(ncols, std::vector<double>(ncols,0));
321 :
322 : int c=0;
323 :
324 20 : for(int i=0; i<nrows; ++i) {
325 60 : for(int j=0; j<nrows; ++j) {
326 45 : U_[j][i] = U[c];
327 45 : c += 1;
328 : }
329 : }
330 : c = 0; // note! its [j][i] not [i][j]
331 20 : for(int i=0; i<ncols; ++i) {
332 60 : for(int j=0; j<ncols; ++j) {
333 45 : VT_[j][i] = VT[c];
334 45 : c += 1;
335 : }
336 : }
337 : c=0; // note! its [j][i] not [i][j]
338 :
339 :
340 : // calculate determinants
341 5 : double det_u = determinant(nrows, &U_);
342 5 : double det_vt = determinant(ncols, &VT_);
343 :
344 : // check!
345 5 : if (det_u * det_vt < 0.0) {
346 8 : for(int i=0; i<nrows; ++i) {
347 6 : U_[i][nrows-1] *= -1;
348 : }
349 : }
350 :
351 :
352 : //Matrix<double> rotation(3,3);
353 5 : rotation.resize(3,3);
354 : Matrix<double> u(3,3), vt(3,3);
355 20 : for(int i=0; i<3; ++i) {
356 60 : for(int j=0; j<3; ++j) {
357 45 : u(i,j)=U_[i][j];
358 45 : vt(i,j)=VT_[i][j];
359 : }
360 : }
361 :
362 : // get rotation matrix
363 5 : mult(u, vt, rotation);
364 :
365 10 : }
366 :
367 :
368 : // calculates maha dist
369 5 : double position_maha_dist::cal_maha_dist() {
370 :
371 : unsigned N=getNumberOfAtoms();
372 :
373 : Matrix<double> rotated_obj(N,3);
374 : // rotate the object
375 5 : mult(mobile_str, rotation, rotated_obj);
376 :
377 : // compute the displacement
378 : Matrix<double> disp(N,3);
379 120 : for(unsigned int i=0; i<N; ++i) {
380 460 : for(unsigned int j=0; j<3; ++j) {
381 345 : disp(i,j) = (rotated_obj(i,j)-ref_str(i,j));
382 : }
383 : }
384 :
385 : Matrix<double> prec_dot_disp(N,3);
386 : Matrix<double> disp_T(3,N);
387 : Matrix<double> out(3,3);
388 :
389 5 : mult(prec, disp, prec_dot_disp);
390 5 : transpose(disp, disp_T);
391 5 : mult(disp_T, prec_dot_disp, out);
392 :
393 :
394 :
395 : double maha_d=0.0;
396 20 : for(int i=0; i<3; ++i) {
397 15 : maha_d += out(i,i);
398 : }
399 :
400 5 : if (!squared) {
401 5 : maha_d = std::sqrt(maha_d);
402 : }
403 :
404 5 : return maha_d;
405 : }
406 :
407 : // gradient function
408 5 : void position_maha_dist::grad_maha(double d) {
409 :
410 : unsigned N=getNumberOfAtoms();
411 :
412 5 : derv_.resize(N,3);
413 :
414 : Matrix<double> ref_str_rot_T(N,3);
415 : Matrix<double> rot_T(3,3);
416 : Matrix<double> diff_(N,3);
417 :
418 5 : transpose(rotation, rot_T);
419 5 : mult(ref_str, rot_T, ref_str_rot_T);
420 :
421 120 : for(unsigned i=0; i<N; ++i) {
422 460 : for(unsigned j=0; j<3; ++j) {
423 345 : diff_(i,j) = mobile_str(i,j) - ref_str_rot_T(i,j);
424 : }
425 : }
426 :
427 5 : mult(prec, diff_, derv_);
428 :
429 : //for(unsigned i=0; i<N; ++i){ for(unsigned j=0; j<3; ++j) {derv_(i,j) /= (N*d);} } // dividing by N here!
430 120 : for(unsigned i=0; i<N; ++i) {
431 460 : for(unsigned j=0; j<3; ++j) {
432 345 : if (!squared) {
433 345 : derv_(i,j) /= d;
434 : } else {
435 0 : derv_(i,j) *= 2.0;
436 : }
437 : }
438 : }
439 :
440 :
441 5 : }
442 :
443 :
444 : // numeric gradient
445 0 : void position_maha_dist::numeric_maha() {
446 : // This function performs numerical derivative.
447 : unsigned N=getNumberOfAtoms();
448 : derv_numeric.resize(N,3);
449 :
450 0 : for(unsigned int atom=0; atom<N; ++atom) {
451 0 : for(unsigned int j=0; j<3; ++j) {
452 0 : mobile_str(atom,j) += delta;
453 0 : kabsch_rot_mat();
454 0 : derv_numeric(atom,j) = (cal_maha_dist() - dist)/delta;
455 0 : mobile_str(atom,j) -= delta;
456 : }
457 : }
458 :
459 0 : }
460 :
461 :
462 : // calculator
463 5 : void position_maha_dist::calculate() {
464 :
465 5 : if(pbc) {
466 5 : makeWhole();
467 : }
468 : unsigned N=getNumberOfAtoms();
469 :
470 : mobile_str.resize(N,3);
471 :
472 : // load the mobile str
473 120 : for(unsigned int i=0; i<N; ++i) {
474 115 : Vector pos=getPosition(i); // const PLMD::Vector
475 460 : for(unsigned j=0; j<3; ++j) {
476 345 : mobile_str(i,j) = pos[j];
477 : }
478 : }
479 :
480 : // translating the structure to center of geometry
481 5 : double center_of_geometry[3]= {0.0, 0.0, 0.0};
482 :
483 120 : for(unsigned int i=0; i<N; ++i) {
484 115 : center_of_geometry[0] += mobile_str(i,0);
485 115 : center_of_geometry[1] += mobile_str(i,1);
486 115 : center_of_geometry[2] += mobile_str(i,2);
487 : }
488 :
489 120 : for(unsigned int i=0; i<N; ++i) {
490 460 : for(unsigned int j=0; j<3; ++j) {
491 345 : mobile_str(i,j) -= (center_of_geometry[j]/N);
492 : }
493 : }
494 :
495 5 : kabsch_rot_mat();
496 5 : dist = cal_maha_dist();
497 :
498 5 : grad_maha(dist);
499 : // set derivatives
500 120 : for(unsigned i=0; i<N; ++i) {
501 115 : Vector vi(derv_(i,0), derv_(i,1), derv_(i,2) );
502 115 : setAtomsDerivatives(i, vi);
503 : }
504 5 : setBoxDerivativesNoPbc();
505 5 : setValue(dist);
506 :
507 5 : }
508 :
509 : }
510 : }
511 :
512 :
513 :
|