Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2024 by Glen Hocky, New York University on behalf of authors
3 :
4 : The sizeshape module is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU Lesser General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : The sizeshape module is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU Lesser General Public License for more details.
13 :
14 : You should have received a copy of the GNU Lesser General Public License
15 : along with plumed. If not, see <http://www.gnu.org/licenses/>.
16 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
17 : #include "colvar/Colvar.h"
18 : #include "core/ActionRegister.h"
19 : #include "tools/Pbc.h"
20 : #include "tools/File.h" // Input and output from files
21 : #include "tools/Matrix.h" // Linear Algebra operations
22 : #include <sstream>
23 : #include <cmath>
24 : #include "tools/Communicator.h" // All the MPI related stuffs
25 :
26 : namespace PLMD {
27 : namespace sizeshape {
28 :
29 : //+PLUMEDOC sizeshapeMOD_COLVAR SIZESHAPE_POSITION_LINEAR_PROJ
30 : /*
31 : Calculates a linear projection in the space of a given reference configurational distribution in size-and-shape space.
32 :
33 : This method is described in \cite Sasmal-poslda-2023.
34 :
35 : The linear projection is given by:
36 : \f[
37 : l(\mathbf{x}) = \mathbf{v}\cdot(\mathbf{R}\cdot(\mathbf{x}(t) - \vec{{\zeta}}(t)) - \mathbf{\mu}),
38 : \f]
39 : Where \f$\mathbf{v}\f$ is a vector of linear coefficients, \f$\mathbf{x}(t)\f$ is the configuration at time t, \f$\vec{\zeta}(t)\f$ is the difference in the geometric mean of the current configuration and that of the reference configuration \f$\mathbf{\mu}\f$. \f$\vec{\zeta}(t) = \frac{1}{N} \sum_{i=1}^N \vec{x_{i}}(t) - \frac{1}{N} \sum_{i=1}^N \vec{\mu_{i}}(t)\f$, for N atoms.
40 :
41 : \f$\mathbf{R}\f$ is an optimal rotation matrix that minimizes the Mahalanobis distance between the current configuration and reference. \f$\mathbf{R}\f$ is obtained by using Kabsch algorithm within the code. The Mahalanobis distance is given as:
42 :
43 : \f[
44 : d(\mathbf{x}, \mathbf{\mu}, \mathbf{\Sigma}) = \sqrt{(\mathbf{x}-\mathbf{\mu})^T \mathbf{\Sigma}^{-1} (\mathbf{x}-\mathbf{\mu})}
45 : \f]
46 :
47 : where, \f$\mathbf{\Sigma}^{-1}\f$ is the \f$N\times N\f$ precision matrix. See also \ref POSITION_MAHA_DIST for information about calculating Mahalanobis distance in size-and-shape space.
48 :
49 : Size-and-shape Gaussian Mixture Model (shapeGMM) \cite Heidi-shapeGMM-2022 is a probabilistic clustering technique that is used to perform structural clusteing on ensemble of molecular configurations and to obtain reference \f$(\mathbf{\mu})\f$ and precision \f$(\mathbf{\Sigma}^{-1})\f$ corresponding to each of the cluster centers. Please chcek out <a href="https://github.com/mccullaghlab/shapeGMMTorch">shapeGMMTorch-GitHub</a> and <a href="https://pypi.org/project/shapeGMMTorch/"> shapeGMMTorch-PyPI</a> for examples and informations on preforming shapeGMM clustering.
50 :
51 : \par Examples
52 : In the following example, a group is defined with atom indices of selected atoms and then linear projection is calculated for the given reference, precision and coefficients. Each file is a space separated list of 3N floating point numbers.
53 :
54 : \plumedfile
55 : UNITS LENGTH=A TIME=ps ENERGY=kcal/mol
56 : GROUP ATOMS=18,20,22,31,33,35,44,46,48,57,59,61,70,72,74,83,85,87,96,98,100,109,111 LABEL=ga_list
57 : #SETTINGS AUXFILE=regtest/sizeshape/rt-sizeshape/global_avg.txt
58 : #SETTINGS AUXFILE=regtest/sizeshape/rt-sizeshape/global_precision.txt
59 : #SETTINGS AUXFILE=regtest/sizeshape/rt-sizeshape/ld1_scalings.txt
60 : proj: SIZESHAPE_POSITION_LINEAR_PROJ REFERENCE=global_avg.txt PRECISION=global_precision.txt COEFFS=ld1_scalings.txt GROUP=ga_list
61 : PRINT ARG=proj STRIDE=1 FILE=COLVAR FMT=%8.8f
62 : \endplumedfile
63 :
64 : */
65 : //+ENDPLUMEDOC
66 :
67 :
68 : class position_linear_proj : public Colvar {
69 :
70 : private:
71 : bool pbc, serial;
72 : std::string prec_f_name; // precision file name
73 : std::string ref_f_name; // reference file name
74 : std::string coeffs_f_name; // file containing linear coeffs
75 : IFile in_; // create an object of class IFile
76 : Log out_;
77 : Matrix<double> ref_str; // coords of reference
78 : Matrix<double> mobile_str; // coords of mobile
79 : Matrix<double> prec; // precision data
80 : Matrix<double> rotation;
81 : std::vector<double> linear_coeffs; // Linear Coefficients
82 : Matrix<double> derv_numeric;
83 : void readinputs(); // reads the input data
84 : double proj; // projection value
85 : std::vector<AtomNumber> atom_list; // list of atoms
86 : const double SMALL = 1.0E-30;
87 : const double delta = 0.00001;
88 : public:
89 : static void registerKeywords( Keywords& keys );
90 : explicit position_linear_proj(const ActionOptions&);
91 : double determinant(int n, const std::vector<std::vector<double>>* B);
92 : void kabsch_rot_mat(); // gives rotation matrix
93 : double cal_position_linear_proj(); // calculates the linear projection
94 : void numeric_grad(); // calculates the numeric gradient
95 : // active methods:
96 : void calculate() override;
97 : };
98 :
99 : PLUMED_REGISTER_ACTION(position_linear_proj, "SIZESHAPE_POSITION_LINEAR_PROJ")
100 :
101 : // register keywords function
102 7 : void position_linear_proj::registerKeywords( Keywords& keys ) {
103 7 : Colvar::registerKeywords( keys );
104 14 : keys.add("compulsory", "PRECISION", "Precision Matrix (inverse of covariance)." );
105 14 : keys.add("compulsory", "REFERENCE", "Coordinates of the reference structure.");
106 14 : keys.add("atoms","GROUP","Group of atoms being used");
107 14 : keys.add("compulsory", "COEFFS", "Vector of linear coefficients.");
108 14 : keys.addFlag("SERIAL",false,"Perform the calculation in serial, for debug purposes only.");
109 7 : keys.setValueDescription("the linear projection");
110 7 : }
111 :
112 : // constructor function
113 5 : position_linear_proj::position_linear_proj(const ActionOptions&ao):
114 : PLUMED_COLVAR_INIT(ao),
115 5 : pbc(true),
116 5 : serial(false),
117 5 : proj(0),
118 10 : prec_f_name(""),
119 5 : ref_f_name(""),
120 15 : coeffs_f_name("") // Note! no comma here in the last line.
121 : {
122 5 : parseFlag("SERIAL",serial);
123 5 : parseAtomList("GROUP",atom_list);
124 5 : parse("REFERENCE", ref_f_name);
125 5 : parse("PRECISION", prec_f_name);
126 5 : parse("COEFFS", coeffs_f_name);
127 5 : bool nopbc=!pbc;
128 5 : parseFlag("NOPBC",nopbc);
129 5 : pbc=!nopbc;
130 :
131 5 : checkRead();
132 :
133 5 : log.printf(" of %u atoms\n",static_cast<unsigned>(atom_list.size()));
134 120 : for(unsigned int i=0; i<atom_list.size(); ++i) {
135 115 : log.printf(" %d", atom_list[i].serial());
136 : }
137 :
138 5 : if(pbc) log.printf("\n using periodic boundary conditions\n");
139 0 : else log.printf("\n without periodic boundary conditions\n");
140 :
141 10 : addValueWithDerivatives(); setNotPeriodic();
142 :
143 5 : requestAtoms(atom_list);
144 :
145 : // call the readinputs() function here
146 5 : readinputs();
147 :
148 5 : }
149 :
150 : // read inputs function
151 5 : void position_linear_proj::readinputs()
152 : {
153 : unsigned N=getNumberOfAtoms();
154 : // read ref coords
155 5 : in_.open(ref_f_name);
156 :
157 : ref_str.resize(N,3); prec.resize(N,N);
158 : derv_numeric.resize(N,3);
159 :
160 : std::string line_, val_;
161 : unsigned c_=0;
162 :
163 120 : while (c_ < N)
164 : {
165 115 : in_.getline(line_);
166 : std::vector<std::string> items_;
167 115 : std::stringstream check_(line_);
168 :
169 460 : while(std::getline(check_, val_, ' ')) { items_.push_back(val_); }
170 460 : for(int i=0; i<3; ++i) { ref_str(c_,i) = std::stold(items_[i]); }
171 115 : c_ += 1;
172 115 : }
173 5 : in_.close();
174 :
175 : //read precision
176 5 : in_.open(prec_f_name);
177 :
178 : std::string line, val;
179 : unsigned int c = 0;
180 :
181 120 : while(c < N)
182 : {
183 115 : in_.getline(line);
184 :
185 : // vector for storing the objects
186 : std::vector<std::string> items;
187 :
188 : // stringstream helps to treat a string like an ifstream!
189 115 : std::stringstream check(line);
190 :
191 2760 : while (std::getline(check, val, ' '))
192 : {
193 2645 : items.push_back(val);
194 : }
195 :
196 2760 : for(unsigned int i=0; i<N; ++i)
197 : {
198 2645 : prec(c, i) = std::stold(items[i]);
199 : }
200 :
201 115 : c += 1;
202 :
203 115 : }
204 5 : in_.close();
205 :
206 : // read in the linear coeffs
207 5 : in_.open(coeffs_f_name);
208 : unsigned n_=0;
209 : std::string l_;
210 350 : while (n_ < N*3) { in_.getline(l_); linear_coeffs.push_back(std::stod(l_)); n_ += 1; }
211 5 : linear_coeffs.resize(N*3);
212 :
213 5 : in_.close();
214 :
215 5 : }
216 :
217 :
218 :
219 1430 : double position_linear_proj::determinant(int n, const std::vector<std::vector<double>>* B)
220 : {
221 :
222 1430 : std::vector<std::vector<double>> A(n, std::vector<double>(n, 0));
223 : // make a copy first!
224 5720 : for(int i=0; i<n; ++i) {
225 17160 : for(int j=0; j<n; ++j) {A[i][j] = (*B)[i][j];}
226 : }
227 :
228 :
229 : // It calculates determinant of a matrix using partial pivoting.
230 :
231 : double det = 1;
232 :
233 : // Row operations for i = 0, ,,,, n - 2 (n-1 not needed)
234 4290 : for ( int i = 0; i < n - 1; i++ )
235 : {
236 : // Partial pivot: find row r below with largest element in column i
237 : int r = i;
238 2860 : double maxA = std::abs( A[i][i] );
239 7150 : for ( int k = i + 1; k < n; k++ )
240 : {
241 4290 : double val = std::abs( A[k][i] );
242 4290 : if ( val > maxA )
243 : {
244 : r = k;
245 : maxA = val;
246 : }
247 : }
248 2860 : if ( r != i )
249 : {
250 10010 : for ( int j = i; j < n; j++ ) std::swap( A[i][j], A[r][j] );
251 2860 : det = -det;
252 : }
253 :
254 : // Row operations to make upper-triangular
255 2860 : double pivot = A[i][i];
256 2860 : if (std::abs( pivot ) < SMALL ) return 0.0; // Singular matrix
257 :
258 7150 : for ( int r = i + 1; r < n; r++ ) // On lower rows
259 : {
260 4290 : double multiple = A[r][i] / pivot; // Multiple of row i to clear element in ith column
261 15730 : for ( int j = i; j < n; j++ ) A[r][j] -= multiple * A[i][j];
262 : }
263 2860 : det *= pivot; // Determinant is product of diagonal
264 : }
265 :
266 1430 : det *= A[n-1][n-1];
267 :
268 1430 : return det;
269 1430 : }
270 :
271 : // kabsch rotation
272 715 : void position_linear_proj::kabsch_rot_mat() {
273 :
274 : unsigned N=getNumberOfAtoms();
275 :
276 : Matrix<double> mobile_str_T(3,N);
277 : Matrix<double> prec_dot_ref_str(N,3);
278 : Matrix<double> correlation(3,3);
279 :
280 :
281 715 : transpose(mobile_str, mobile_str_T);
282 715 : mult(prec, ref_str, prec_dot_ref_str);
283 715 : mult(mobile_str_T, prec_dot_ref_str, correlation);
284 :
285 :
286 715 : int rw = correlation.nrows();
287 715 : int cl = correlation.ncols();
288 715 : int sz = rw*cl;
289 :
290 : // SVD part (taking from plu2/src/tools/Matrix.h: pseudoInvert function)
291 :
292 715 : std::vector<double> da(sz);
293 : unsigned k=0;
294 :
295 : // Transfer the matrix to the local array
296 9295 : for (int i=0; i<cl; ++i) for (int j=0; j<rw; ++j) da[k++]=static_cast<double>( correlation(j,i) ); // note! its [j][i] not [i][j]
297 :
298 715 : int nsv, info, nrows=rw, ncols=cl;
299 : if(rw>cl) {nsv=cl;} else {nsv=rw;}
300 :
301 : // Create some containers for stuff from single value decomposition
302 715 : std::vector<double> S(nsv);
303 715 : std::vector<double> U(nrows*nrows);
304 715 : std::vector<double> VT(ncols*ncols);
305 715 : std::vector<int> iwork(8*nsv);
306 :
307 : // This optimizes the size of the work array used in lapack singular value decomposition
308 715 : int lwork=-1;
309 715 : std::vector<double> work(1);
310 715 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
311 : //if(info!=0) return info;
312 715 : if(info!=0) log.printf("info:", info);
313 :
314 : // Retrieve correct sizes for work and rellocate
315 715 : lwork=(int) work[0]; work.resize(lwork);
316 :
317 : // This does the singular value decomposition
318 715 : plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
319 : //if(info!=0) return info;
320 715 : if(info!=0) log.printf("info:", info);
321 :
322 :
323 : // get U and VT in form of 2D vector (U_, VT_)
324 715 : std::vector<std::vector<double>> U_(nrows, std::vector<double>(nrows,0));
325 715 : std::vector<std::vector<double>> VT_(ncols, std::vector<double>(ncols,0));
326 :
327 : int c=0;
328 :
329 9295 : for(int i=0; i<nrows; ++i) { for(int j=0; j<nrows; ++j) { U_[j][i] = U[c]; c += 1;} } c = 0; // note! its [j][i] not [i][j]
330 9295 : for(int i=0; i<ncols; ++i) { for(int j=0; j<ncols; ++j) { VT_[j][i] = VT[c]; c += 1;} } c=0; // note! its [j][i] not [i][j]
331 :
332 :
333 : // calculate determinants
334 715 : double det_u = determinant(nrows, &U_);
335 715 : double det_vt = determinant(ncols, &VT_);
336 :
337 : // check!
338 1859 : if (det_u * det_vt < 0.0) { for(int i=0; i<nrows; ++i) {U_[i][nrows-1] *= -1;} }
339 :
340 :
341 : //Matrix<double> rotation(3,3);
342 715 : rotation.resize(3,3);
343 : Matrix<double> u(3,3), vt(3,3);
344 9295 : for(int i=0; i<3; ++i) { for(int j=0; j<3; ++j) { u(i,j)=U_[i][j]; vt(i,j)=VT_[i][j]; } }
345 :
346 : // get rotation matrix
347 715 : mult(u, vt, rotation);
348 :
349 1430 : }
350 :
351 :
352 : // calculates linear projection
353 715 : double position_linear_proj::cal_position_linear_proj() {
354 :
355 : unsigned N=getNumberOfAtoms();
356 :
357 : Matrix<double> rotated_obj(N,3);
358 : // rotate the object
359 715 : mult(mobile_str, rotation, rotated_obj);
360 :
361 : // compute the displacement
362 715 : std::vector<double> disp(N*3);
363 : unsigned c=0;
364 66495 : for(unsigned int i=0; i<N; ++i) { for(int j=0; j<3; ++j) { disp[c] = (rotated_obj(i,j)-ref_str(i,j)); c+=1;} }
365 :
366 : //double proj_val = dotProduct(disp, linear_coeffs);
367 : double proj_val = 0.0;
368 50050 : for(unsigned int i=0; i<N*3; ++i) { proj_val += (linear_coeffs[i]*disp[i]);}
369 :
370 715 : return proj_val;
371 : }
372 :
373 : // numeric gradient
374 25 : void position_linear_proj::numeric_grad() {
375 : // This function performs numerical derivative.
376 : unsigned N=getNumberOfAtoms();
377 :
378 : unsigned stride;
379 : unsigned rank;
380 25 : if(serial) {
381 : // when using components the parallelisation do not work
382 : stride=1;
383 : rank=0;
384 : } else {
385 25 : stride=comm.Get_size();
386 25 : rank=comm.Get_rank();
387 : }
388 :
389 255 : for(unsigned i=rank; i<N; i+=stride) {
390 920 : for (unsigned j=0; j<3; ++j) {
391 :
392 690 : mobile_str(i,j) += delta;
393 690 : kabsch_rot_mat();
394 690 : derv_numeric(i,j) = ((cal_position_linear_proj() - proj)/delta);
395 :
396 690 : mobile_str(i,j) -= delta;
397 : }
398 :
399 : }
400 :
401 25 : if(!serial) {
402 25 : if(!derv_numeric.getVector().empty()) comm.Sum(&derv_numeric(0,0), derv_numeric.getVector().size());
403 : }
404 :
405 :
406 600 : for(unsigned i=0; i<N; ++i) {
407 575 : Vector vi(derv_numeric(i,0), derv_numeric(i,1), derv_numeric(i,2) );
408 575 : setAtomsDerivatives(i, vi);
409 : }
410 :
411 : // clear the matrix (very important step!!)
412 25 : derv_numeric *= 0;
413 25 : }
414 :
415 :
416 : // calculator
417 25 : void position_linear_proj::calculate() {
418 :
419 25 : if(pbc) makeWhole();
420 : unsigned N=getNumberOfAtoms();
421 :
422 : mobile_str.resize(N,3);
423 :
424 : // load the mobile str
425 600 : for(unsigned int i=0; i<N; ++i) {
426 575 : Vector pos=getPosition(i); // const PLMD::Vector
427 2300 : for(unsigned j=0; j<3; ++j) {
428 1725 : mobile_str(i,j) = pos[j];
429 : }
430 : }
431 :
432 : // translating the structure to center of geometry
433 25 : double center_of_geometry[3]= {0.0, 0.0, 0.0};
434 :
435 600 : for(unsigned int i=0; i<N; ++i)
436 : {
437 575 : center_of_geometry[0] += mobile_str(i,0); center_of_geometry[1] += mobile_str(i,1); center_of_geometry[2] += mobile_str(i,2);
438 : }
439 :
440 600 : for(unsigned int i=0; i<N; ++i)
441 : {
442 2300 : for(int j=0; j<3; ++j) { mobile_str(i,j) -= (center_of_geometry[j]/N); }
443 : }
444 :
445 25 : kabsch_rot_mat();
446 25 : proj = cal_position_linear_proj();
447 :
448 25 : numeric_grad();
449 25 : setBoxDerivativesNoPbc();
450 25 : setValue(proj);
451 :
452 :
453 25 : }
454 :
455 : }
456 : }
457 :
458 :
459 :
|