LCOV - code coverage report
Current view: top level - sizeshape - mahadist.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 143 152 94.1 %
Date: 2024-10-18 14:00:25 Functions: 8 10 80.0 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             : Copyright (c) 2024 by Glen Hocky, New York University on behalf of authors
       3             : 
       4             : The sizeshape module is free software: you can redistribute it and/or modify
       5             : it under the terms of the GNU Lesser General Public License as published by
       6             : the Free Software Foundation, either version 3 of the License, or
       7             : (at your option) any later version.
       8             : 
       9             : The sizeshape module is distributed in the hope that it will be useful,
      10             : but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             : GNU Lesser General Public License for more details.
      13             : 
      14             : You should have received a copy of the GNU Lesser General Public License
      15             : along with plumed.  If not, see <http://www.gnu.org/licenses/>.
      16             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      17             : #include "colvar/Colvar.h"
      18             : #include "core/ActionRegister.h"
      19             : #include "tools/Pbc.h"
      20             : #include "tools/File.h"           // Input and output from files 
      21             : #include "tools/Matrix.h"         // Linear Algebra operations
      22             : #include <sstream>
      23             : #include <cmath>
      24             : 
      25             : namespace PLMD {
      26             : namespace sizeshape {
      27             : 
      28             : //+PLUMEDOC sizeshapeMOD_COLVAR SIZESHAPE_POSITION_MAHA_DIST
      29             : /*
      30             : Calculates Mahalanobis distance of a current configuration from a  given reference configurational distribution in size-and-shape space.
      31             : 
      32             : The Mahalanobis distance is given as:
      33             : 
      34             : \f[
      35             : d(\mathbf{x}, \mathbf{\mu}, \mathbf{\Sigma}) = \sqrt{(\mathbf{x}-\mathbf{\mu})^T \mathbf{\Sigma}^{-1} (\mathbf{x}-\mathbf{\mu})}
      36             : \f]
      37             : 
      38             : Here \f$\mathbf{x}\f$ is the configuration at time t, \f$\mathbf{\mu}\f$ is the reference and \f$\mathbf{\Sigma}^{-1}\f$ is the \f$N \times N\f$ precision matrix.
      39             : 
      40             : Size-and-shape Gaussian Mixture Model (shapeGMM) \cite Heidi-shapeGMM-2022 is a probabilistic clustering technique that is used to perform structural clusteing on ensemble of molecular configurations and to obtain reference \f$(\mathbf{\mu})\f$ and precision \f$(\mathbf{\Sigma}^{-1})\f$ corresponding to each of the cluster centers. Please chcek out <a href="https://github.com/mccullaghlab/shapeGMMTorch">shapeGMMTorch-GitHub</a> and <a href="https://pypi.org/project/shapeGMMTorch/"> shapeGMMTorch-PyPI</a> for examples and informations on preforming shapeGMM clustering.
      41             : 
      42             : \par Examples
      43             : In the following example, a group is defined with atom indices of selected atoms and then Mahalanobis distance is calculated with respect to the given reference and precision. Each file is a space separated list of 3N floating point numbers.
      44             : 
      45             : \plumedfile
      46             : UNITS LENGTH=A TIME=ps ENERGY=kcal/mol
      47             : GROUP ATOMS=18,20,22,31,33,35,44,46,48,57,59,61,70,72,74,83,85,87,96,98,100,109,111 LABEL=ga_list
      48             : #SETTINGS AUXFILE=regtest/sizeshape/rt-mahadist/global_avg.txt
      49             : #SETTINGS AUXFILE=regtest/sizeshape/rt-mahadist/global_precision.txt
      50             : d: SIZESHAPE_POSITION_MAHA_DIST REFERENCE=global_avg.txt PRECISION=global_precision.txt GROUP=ga_list
      51             : PRINT ARG=d STRIDE=1 FILE=output FMT=%8.8f
      52             : \endplumedfile
      53             : 
      54             : */
      55             : //+ENDPLUMEDOC
      56             : 
      57             : class position_maha_dist : public Colvar {
      58             : 
      59             : private:
      60             :   bool pbc, squared;
      61             :   std::string prec_f_name;                      // precision file name
      62             :   std::string ref_f_name;                       // reference file name
      63             :   IFile in_;                                    // create an object of class IFile
      64             :   //Log out_;
      65             :   Matrix<double> ref_str;                 // coords of reference
      66             :   Matrix<double> mobile_str;              // coords of mobile
      67             :   Matrix<double> prec;                            // precision data
      68             :   Matrix<double> rotation;
      69             :   Matrix<double> derv_;
      70             :   Matrix<double> derv_numeric;
      71             :   void readinputs();                            // reads the input data
      72             :   double dist;
      73             :   std::vector<AtomNumber> atom_list;            // list of atoms
      74             :   const double SMALL = 1.0E-30;
      75             :   const double delta = 0.00001;
      76             : public:
      77             :   static void registerKeywords( Keywords& keys );
      78             :   explicit position_maha_dist(const ActionOptions&);
      79             :   double determinant(int n, const std::vector<std::vector<double>>* B);
      80             :   void kabsch_rot_mat();                // gives rotation matrix
      81             :   double cal_maha_dist();               // calculates the mahalanobis distance
      82             :   void grad_maha(double);               // calculates the gradient
      83             :   void numeric_maha();                  // calculates the numeric gradient
      84             :   // active methods:
      85             :   void calculate() override;
      86             : };
      87             : 
      88             : PLUMED_REGISTER_ACTION(position_maha_dist,"SIZESHAPE_POSITION_MAHA_DIST")
      89             : 
      90           3 : void position_maha_dist::registerKeywords( Keywords& keys ) {
      91           3 :   Colvar::registerKeywords( keys );
      92           6 :   keys.add("compulsory", "PRECISION", "Precision Matrix (inverse of covariance)" );
      93           6 :   keys.add("compulsory", "REFERENCE", "Reference structure.");
      94           6 :   keys.add("atoms","GROUP","The group of atoms being used");
      95           6 :   keys.addFlag("SQUARED",false,"Returns the square of distance.");
      96           3 :   keys.setValueDescription("the Mahalanobis distance between the instantaneous configuration and a given reference distribution in size-and-shape space");
      97           3 : }
      98             : 
      99             : // constructor function
     100           1 : position_maha_dist::position_maha_dist(const ActionOptions&ao):
     101             :   PLUMED_COLVAR_INIT(ao),
     102           1 :   pbc(true),
     103           1 :   squared(false),
     104           1 :   dist(0),
     105           2 :   prec_f_name(""),
     106           2 :   ref_f_name("")    // Note! no comma here in the last line.
     107             : {
     108           1 :   parseAtomList("GROUP",atom_list);
     109           1 :   parse("REFERENCE", ref_f_name);
     110           1 :   parse("PRECISION", prec_f_name);
     111             : 
     112           1 :   bool nopbc=!pbc;
     113           1 :   parseFlag("NOPBC",nopbc);
     114           1 :   parseFlag("SQUARED",squared);
     115           1 :   pbc=!nopbc;
     116             : 
     117           1 :   checkRead();
     118             : 
     119           1 :   log.printf("  of %u atoms\n",static_cast<unsigned>(atom_list.size()));
     120          24 :   for(unsigned int i=0; i<atom_list.size(); ++i) {
     121          23 :     log.printf("  %d", atom_list[i].serial());
     122             :   }
     123             : 
     124           1 :   if(squared)log.printf("\n chosen to use SQUARED option for SIZESHAPE_POSITION_MAHA_DIST\n");
     125             : 
     126           1 :   if(pbc) log.printf("\n using periodic boundary conditions\n");
     127           0 :   else log.printf("\n without periodic boundary conditions\n");
     128             : 
     129           2 :   addValueWithDerivatives(); setNotPeriodic();
     130             : 
     131           1 :   requestAtoms(atom_list);
     132             : 
     133             :   // call the readinputs() function here
     134           1 :   readinputs();
     135             : 
     136           1 : }
     137             : 
     138             : // read inputs function
     139           1 : void position_maha_dist::readinputs()
     140             : {
     141             :   unsigned N=getNumberOfAtoms();
     142             :   // read ref coords
     143           1 :   in_.open(ref_f_name);
     144             : 
     145             :   ref_str.resize(N,3); prec.resize(N,N);
     146             : 
     147             :   std::string line_, val_;
     148             :   unsigned c_=0;
     149             : 
     150          24 :   while (c_ < N)
     151             :   {
     152          23 :     in_.getline(line_);
     153             :     std::vector<std::string> items_;
     154          23 :     std::stringstream check_(line_);
     155             : 
     156          92 :     while(std::getline(check_, val_, ' ')) { items_.push_back(val_); }
     157          92 :     for(int i=0; i<3; ++i) { ref_str(c_,i) = std::stold(items_[i]); }
     158          23 :     c_ += 1;
     159          23 :   }
     160           1 :   in_.close();
     161             : 
     162             :   //read precision
     163           1 :   in_.open(prec_f_name);
     164             : 
     165             :   std::string line, val;
     166             :   unsigned int c = 0;
     167             : 
     168          24 :   while(c < N)
     169             :   {
     170          23 :     in_.getline(line);
     171             : 
     172             :     // vector for storing the objects
     173             :     std::vector<std::string> items;
     174             : 
     175             :     // stringstream helps to treat a string like an ifstream!
     176          23 :     std::stringstream check(line);
     177             : 
     178         552 :     while (std::getline(check, val, ' '))
     179             :     {
     180         529 :       items.push_back(val);
     181             :     }
     182             : 
     183         552 :     for(unsigned int i=0; i<N; ++i)
     184             :     {
     185         529 :       prec(c, i) = std::stold(items[i]);
     186             :     }
     187             : 
     188          23 :     c += 1;
     189             : 
     190          23 :   }
     191           1 :   in_.close();
     192           1 : }
     193             : 
     194             : 
     195          10 : double position_maha_dist::determinant(int n, const std::vector<std::vector<double>>* B)
     196             : {
     197             : 
     198          10 :   std::vector<std::vector<double>> A(n, std::vector<double>(n, 0));
     199             :   // make a copy first!
     200          40 :   for(int i=0; i<n; ++i) {
     201         120 :     for(int j=0; j<n; ++j) {A[i][j] = (*B)[i][j];}
     202             :   }
     203             : 
     204             : 
     205             :   //  It calculates determinant of a matrix using partial pivoting.
     206             : 
     207             :   double det = 1;
     208             : 
     209             :   // Row operations for i = 0, ,,,, n - 2 (n-1 not needed)
     210          30 :   for ( int i = 0; i < n - 1; i++ )
     211             :   {
     212             :     // Partial pivot: find row r below with largest element in column i
     213             :     int r = i;
     214          20 :     double maxA = std::abs( A[i][i] );
     215          50 :     for ( int k = i + 1; k < n; k++ )
     216             :     {
     217          30 :       double val = std::abs( A[k][i] );
     218          30 :       if ( val > maxA )
     219             :       {
     220             :         r = k;
     221             :         maxA = val;
     222             :       }
     223             :     }
     224          20 :     if ( r != i )
     225             :     {
     226          70 :       for ( int j = i; j < n; j++ ) std::swap( A[i][j], A[r][j] );
     227          20 :       det = -det;
     228             :     }
     229             : 
     230             :     // Row operations to make upper-triangular
     231          20 :     double pivot = A[i][i];
     232          20 :     if (std::abs( pivot ) < SMALL ) return 0.0;              // Singular matrix
     233             : 
     234          50 :     for ( int r = i + 1; r < n; r++ )                    // On lower rows
     235             :     {
     236          30 :       double multiple = A[r][i] / pivot;                // Multiple of row i to clear element in ith column
     237         110 :       for ( int j = i; j < n; j++ ) A[r][j] -= multiple * A[i][j];
     238             :     }
     239          20 :     det *= pivot;                                        // Determinant is product of diagonal
     240             :   }
     241             : 
     242          10 :   det *= A[n-1][n-1];
     243             : 
     244          10 :   return det;
     245          10 : }
     246             : 
     247             : // kabsch rotation
     248           5 : void position_maha_dist::kabsch_rot_mat() {
     249             : 
     250             :   unsigned N=getNumberOfAtoms();
     251             : 
     252             :   Matrix<double> mobile_str_T(3,N);
     253             :   Matrix<double> prec_dot_ref_str(N,3);
     254             :   Matrix<double> correlation(3,3);
     255             : 
     256             : 
     257           5 :   transpose(mobile_str, mobile_str_T);
     258           5 :   mult(prec, ref_str, prec_dot_ref_str);
     259           5 :   mult(mobile_str_T, prec_dot_ref_str, correlation);
     260             : 
     261             : 
     262           5 :   int rw = correlation.nrows();
     263           5 :   int cl = correlation.ncols();
     264           5 :   int sz = rw*cl;
     265             : 
     266             :   // SVD part (taking from plu2/src/tools/Matrix.h: pseudoInvert function)
     267             : 
     268           5 :   std::vector<double> da(sz);
     269             :   unsigned k=0;
     270             : 
     271             :   // Transfer the matrix to the local array
     272          65 :   for (int i=0; i<cl; ++i) for (int j=0; j<rw; ++j) da[k++]=static_cast<double>( correlation(j,i) ); // note! its [j][i] not [i][j]
     273             : 
     274           5 :   int nsv, info, nrows=rw, ncols=cl;
     275             :   if(rw>cl) {nsv=cl;} else {nsv=rw;}
     276             : 
     277             :   // Create some containers for stuff from single value decomposition
     278           5 :   std::vector<double> S(nsv);
     279           5 :   std::vector<double> U(nrows*nrows);
     280           5 :   std::vector<double> VT(ncols*ncols);
     281           5 :   std::vector<int> iwork(8*nsv);
     282             : 
     283             :   // This optimizes the size of the work array used in lapack singular value decomposition
     284           5 :   int lwork=-1;
     285           5 :   std::vector<double> work(1);
     286           5 :   plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
     287             :   //if(info!=0) return info;
     288           5 :   if(info!=0) log.printf("info:", info);
     289             : 
     290             :   // Retrieve correct sizes for work and rellocate
     291           5 :   lwork=(int) work[0]; work.resize(lwork);
     292             : 
     293             :   // This does the singular value decomposition
     294           5 :   plumed_lapack_dgesdd( "A", &nrows, &ncols, da.data(), &nrows, S.data(), U.data(), &nrows, VT.data(), &ncols, work.data(), &lwork, iwork.data(), &info );
     295             :   //if(info!=0) return info;
     296           5 :   if(info!=0) log.printf("info:", info);
     297             : 
     298             : 
     299             :   // get U and VT in form of 2D vector (U_, VT_)
     300           5 :   std::vector<std::vector<double>> U_(nrows, std::vector<double>(nrows,0));
     301           5 :   std::vector<std::vector<double>> VT_(ncols, std::vector<double>(ncols,0));
     302             : 
     303             :   int  c=0;
     304             : 
     305          65 :   for(int i=0; i<nrows; ++i) { for(int j=0; j<nrows; ++j) { U_[j][i] = U[c]; c += 1;} } c = 0; // note! its [j][i] not [i][j]
     306          65 :   for(int i=0; i<ncols; ++i) { for(int j=0; j<ncols; ++j) { VT_[j][i] = VT[c]; c += 1;} } c=0; // note! its [j][i] not [i][j]
     307             : 
     308             : 
     309             :   // calculate determinants
     310           5 :   double det_u = determinant(nrows, &U_);
     311           5 :   double det_vt = determinant(ncols, &VT_);
     312             : 
     313             :   // check!
     314          13 :   if (det_u * det_vt < 0.0) { for(int i=0; i<nrows; ++i) {U_[i][nrows-1] *= -1;} }
     315             : 
     316             : 
     317             :   //Matrix<double> rotation(3,3);
     318           5 :   rotation.resize(3,3);
     319             :   Matrix<double> u(3,3), vt(3,3);
     320          65 :   for(int i=0; i<3; ++i) { for(int j=0; j<3; ++j) { u(i,j)=U_[i][j]; vt(i,j)=VT_[i][j]; } }
     321             : 
     322             :   // get rotation matrix
     323           5 :   mult(u, vt, rotation);
     324             : 
     325          10 : }
     326             : 
     327             : 
     328             : // calculates maha dist
     329           5 : double position_maha_dist::cal_maha_dist() {
     330             : 
     331             :   unsigned N=getNumberOfAtoms();
     332             : 
     333             :   Matrix<double> rotated_obj(N,3);
     334             :   // rotate the object
     335           5 :   mult(mobile_str, rotation, rotated_obj);
     336             : 
     337             :   // compute the displacement
     338             :   Matrix<double> disp(N,3);
     339         465 :   for(unsigned int i=0; i<N; ++i) { for(unsigned int j=0; j<3; ++j) { disp(i,j) = (rotated_obj(i,j)-ref_str(i,j)); } }
     340             : 
     341             :   Matrix<double> prec_dot_disp(N,3);
     342             :   Matrix<double> disp_T(3,N);
     343             :   Matrix<double> out(3,3);
     344             : 
     345           5 :   mult(prec, disp, prec_dot_disp);
     346           5 :   transpose(disp, disp_T);
     347           5 :   mult(disp_T, prec_dot_disp, out);
     348             : 
     349             : 
     350             : 
     351             :   double maha_d=0.0;
     352          20 :   for(int i=0; i<3; ++i) { maha_d += out(i,i);}
     353             : 
     354           5 :   if (!squared) maha_d = std::sqrt(maha_d);
     355             : 
     356           5 :   return maha_d;
     357             : }
     358             : 
     359             : // gradient function
     360           5 : void position_maha_dist::grad_maha(double d) {
     361             : 
     362             :   unsigned N=getNumberOfAtoms();
     363             : 
     364           5 :   derv_.resize(N,3);
     365             : 
     366             :   Matrix<double> ref_str_rot_T(N,3);
     367             :   Matrix<double> rot_T(3,3);
     368             :   Matrix<double> diff_(N,3);
     369             : 
     370           5 :   transpose(rotation, rot_T);
     371           5 :   mult(ref_str, rot_T, ref_str_rot_T);
     372             : 
     373         465 :   for(unsigned i=0; i<N; ++i) { for(unsigned j=0; j<3; ++j) { diff_(i,j) = mobile_str(i,j) - ref_str_rot_T(i,j); } }
     374             : 
     375           5 :   mult(prec, diff_, derv_);
     376             : 
     377             :   //for(unsigned i=0; i<N; ++i){ for(unsigned j=0; j<3; ++j) {derv_(i,j) /= (N*d);} }  // dividing by N here!
     378         465 :   for(unsigned i=0; i<N; ++i) { for(unsigned j=0; j<3; ++j) { if (!squared) {derv_(i,j) /= d;} else {derv_(i,j) *= 2.0;}}}
     379             : 
     380             : 
     381           5 : }
     382             : 
     383             : 
     384             : // numeric gradient
     385           0 : void position_maha_dist::numeric_maha() {
     386             :   // This function performs numerical derivative.
     387             :   unsigned N=getNumberOfAtoms();
     388             :   derv_numeric.resize(N,3);
     389             : 
     390           0 :   for(unsigned int atom=0; atom<N; ++atom) {
     391           0 :     for(unsigned int j=0; j<3; ++j) {
     392           0 :       mobile_str(atom,j) += delta;
     393           0 :       kabsch_rot_mat();
     394           0 :       derv_numeric(atom,j) = (cal_maha_dist() - dist)/delta;
     395           0 :       mobile_str(atom,j) -= delta;
     396             :     }
     397             :   }
     398             : 
     399           0 : }
     400             : 
     401             : 
     402             : // calculator
     403           5 : void position_maha_dist::calculate() {
     404             : 
     405           5 :   if(pbc) makeWhole();
     406             :   unsigned N=getNumberOfAtoms();
     407             : 
     408             :   mobile_str.resize(N,3);
     409             : 
     410             :   // load the mobile str
     411         120 :   for(unsigned int i=0; i<N; ++i) {
     412         115 :     Vector pos=getPosition(i);  // const PLMD::Vector
     413         460 :     for(unsigned j=0; j<3; ++j) {
     414         345 :       mobile_str(i,j) = pos[j];
     415             :     }
     416             :   }
     417             : 
     418             :   // translating the structure to center of geometry
     419           5 :   double center_of_geometry[3]= {0.0, 0.0, 0.0};
     420             : 
     421         120 :   for(unsigned int i=0; i<N; ++i)
     422             :   {
     423         115 :     center_of_geometry[0] += mobile_str(i,0); center_of_geometry[1] += mobile_str(i,1); center_of_geometry[2] += mobile_str(i,2);
     424             :   }
     425             : 
     426         120 :   for(unsigned int i=0; i<N; ++i)
     427             :   {
     428         460 :     for(unsigned int j=0; j<3; ++j) { mobile_str(i,j) -= (center_of_geometry[j]/N); }
     429             :   }
     430             : 
     431           5 :   kabsch_rot_mat();
     432           5 :   dist = cal_maha_dist();
     433             : 
     434           5 :   grad_maha(dist);
     435             :   // set derivatives
     436         120 :   for(unsigned i=0; i<N; ++i) {
     437         115 :     Vector vi(derv_(i,0), derv_(i,1), derv_(i,2) );
     438         115 :     setAtomsDerivatives(i, vi);
     439             :   }
     440           5 :   setBoxDerivativesNoPbc();
     441           5 :   setValue(dist);
     442             : 
     443           5 : }
     444             : 
     445             : }
     446             : }
     447             : 
     448             : 
     449             : 

Generated by: LCOV version 1.16