LCOV - code coverage report
Current view: top level - lepton - CompiledExpression.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage (other modules) Lines: 227 231 98.3 %
Date: 2024-10-18 13:59:33 Functions: 19 19 100.0 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             :  * -------------------------------------------------------------------------- *
       3             :  *                                   Lepton                                   *
       4             :  * -------------------------------------------------------------------------- *
       5             :  * This is part of the Lepton expression parser originating from              *
       6             :  * Simbios, the NIH National Center for Physics-Based Simulation of           *
       7             :  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
       8             :  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
       9             :  *                                                                            *
      10             :  * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
      11             :  * Authors: Peter Eastman                                                     *
      12             :  * Contributors:                                                              *
      13             :  *                                                                            *
      14             :  * Permission is hereby granted, free of charge, to any person obtaining a    *
      15             :  * copy of this software and associated documentation files (the "Software"), *
      16             :  * to deal in the Software without restriction, including without limitation  *
      17             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
      18             :  * and/or sell copies of the Software, and to permit persons to whom the      *
      19             :  * Software is furnished to do so, subject to the following conditions:       *
      20             :  *                                                                            *
      21             :  * The above copyright notice and this permission notice shall be included in *
      22             :  * all copies or substantial portions of the Software.                        *
      23             :  *                                                                            *
      24             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
      25             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
      26             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
      27             :  * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
      28             :  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
      29             :  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
      30             :  * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
      31             :  * -------------------------------------------------------------------------- *
      32             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      33             : /* -------------------------------------------------------------------------- *
      34             :  *                                   lepton                                   *
      35             :  * -------------------------------------------------------------------------- *
      36             :  * This is part of the lepton expression parser originating from              *
      37             :  * Simbios, the NIH National Center for Physics-Based Simulation of           *
      38             :  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
      39             :  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
      40             :  *                                                                            *
      41             :  * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
      42             :  * Authors: Peter Eastman                                                     *
      43             :  * Contributors:                                                              *
      44             :  *                                                                            *
      45             :  * Permission is hereby granted, free of charge, to any person obtaining a    *
      46             :  * copy of this software and associated documentation files (the "Software"), *
      47             :  * to deal in the Software without restriction, including without limitation  *
      48             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
      49             :  * and/or sell copies of the Software, and to permit persons to whom the      *
      50             :  * Software is furnished to do so, subject to the following conditions:       *
      51             :  *                                                                            *
      52             :  * The above copyright notice and this permission notice shall be included in *
      53             :  * all copies or substantial portions of the Software.                        *
      54             :  *                                                                            *
      55             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
      56             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
      57             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
      58             :  * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
      59             :  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
      60             :  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
      61             :  * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
      62             :  * -------------------------------------------------------------------------- */
      63             : 
      64             : #include "CompiledExpression.h"
      65             : #include "Operation.h"
      66             : #include "ParsedExpression.h"
      67             : #ifdef __PLUMED_HAS_ASMJIT
      68             :     #include "asmjit/asmjit.h"
      69             : #endif
      70             : #include <utility>
      71             : 
      72             : namespace PLMD {
      73             : using namespace lepton;
      74             : using namespace std;
      75             : #ifdef __PLUMED_HAS_ASMJIT
      76             :     using namespace asmjit;
      77             : #endif
      78             : 
      79       82437 : bool lepton::useAsmJit() {
      80             : #ifdef __PLUMED_HAS_ASMJIT
      81         261 :   static const bool use=[](){
      82         261 :     if(auto s=std::getenv("PLUMED_USE_ASMJIT")) {
      83           2 :       auto ss=std::string(s);
      84           2 :       if(ss=="yes") return true;
      85           1 :       if(ss=="no") return false;
      86           0 :       throw Exception("PLUMED_USE_ASMJIT variable is set to " + ss + "; should be yes or no");
      87             :     }
      88             :     return true; // by default use asmjit
      89       82437 :   }();
      90       82437 :   return use;
      91             : #else
      92             :   return false;
      93             : #endif
      94             : }
      95             : 
      96       32789 : AsmJitRuntimePtr::AsmJitRuntimePtr()
      97             : #ifdef __PLUMED_HAS_ASMJIT
      98       32789 :   : ptr(useAsmJit()?new asmjit::JitRuntime:nullptr)
      99             : #endif
     100       32789 : {}
     101             : 
     102       32789 : AsmJitRuntimePtr::~AsmJitRuntimePtr()
     103             : {
     104             : #ifdef __PLUMED_HAS_ASMJIT
     105       32789 :   if(useAsmJit()) delete static_cast<asmjit::JitRuntime*>(ptr);
     106             : #endif
     107       32789 : }
     108             : 
     109       16363 : CompiledExpression::CompiledExpression() : jitCode(NULL) {
     110       16363 : }
     111             : 
     112       16338 : CompiledExpression::CompiledExpression(const ParsedExpression& expression) : jitCode(NULL) {
     113       16338 :     ParsedExpression expr = expression.optimize(); // Just in case it wasn't already optimized.
     114             :     vector<pair<ExpressionTreeNode, int> > temps;
     115       16338 :     compileExpression(expr.getRootNode(), temps);
     116             :     int maxArguments = 1;
     117       81216 :     for (int i = 0; i < (int) operation.size(); i++)
     118       64878 :         if (operation[i]->getNumArguments() > maxArguments)
     119       10259 :             maxArguments = operation[i]->getNumArguments();
     120       16338 :     argValues.resize(maxArguments);
     121             : #ifdef __PLUMED_HAS_ASMJIT
     122       16338 :     if(useAsmJit()) generateJitCode();
     123             : #endif
     124       32676 : }
     125             : 
     126       32789 : CompiledExpression::~CompiledExpression() {
     127      163116 :     for (int i = 0; i < (int) operation.size(); i++)
     128      130327 :         if (operation[i] != NULL)
     129      130327 :             delete operation[i];
     130       65578 : }
     131             : 
     132          88 : CompiledExpression::CompiledExpression(const CompiledExpression& expression) : jitCode(NULL) {
     133          88 :     *this = expression;
     134          88 : }
     135             : 
     136       16421 : CompiledExpression& CompiledExpression::operator=(const CompiledExpression& expression) {
     137       16421 :     arguments = expression.arguments;
     138       16421 :     target = expression.target;
     139             :     variableIndices = expression.variableIndices;
     140             :     variableNames = expression.variableNames;
     141       16421 :     workspace.resize(expression.workspace.size());
     142       16421 :     argValues.resize(expression.argValues.size());
     143       16421 :     operation.resize(expression.operation.size());
     144       81870 :     for (int i = 0; i < (int) operation.size(); i++)
     145       65449 :         operation[i] = expression.operation[i]->clone();
     146       16421 :     setVariableLocations(variablePointers);
     147       16421 :     return *this;
     148             : }
     149             : 
     150      103586 : void CompiledExpression::compileExpression(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
     151      103586 :     if (findTempIndex(node, temps) != -1)
     152       15809 :         return; // We have already processed a node identical to this one.
     153             :     
     154             :     // Process the child nodes.
     155             :     
     156             :     vector<int> args;
     157      175025 :     for (int i = 0; i < node.getChildren().size(); i++) {
     158       87248 :         compileExpression(node.getChildren()[i], temps);
     159       87248 :         args.push_back(findTempIndex(node.getChildren()[i], temps));
     160             :     }
     161             :     
     162             :     // Process this node.
     163             :     
     164       87777 :     if (node.getOperation().getId() == Operation::VARIABLE) {
     165       22899 :         variableIndices[node.getOperation().getName()] = (int) workspace.size();
     166       45798 :         variableNames.insert(node.getOperation().getName());
     167             :     }
     168             :     else {
     169       64878 :         int stepIndex = (int) arguments.size();
     170       64878 :         arguments.push_back(vector<int>());
     171       64878 :         target.push_back((int) workspace.size());
     172       64878 :         operation.push_back(node.getOperation().clone());
     173       64878 :         if (args.size() == 0)
     174        2276 :             arguments[stepIndex].push_back(0); // The value won't actually be used.  We just need something there.
     175             :         else {
     176             :             // If the arguments are sequential, we can just pass a pointer to the first one.
     177             :             
     178             :             bool sequential = true;
     179       87248 :             for (int i = 1; i < args.size(); i++)
     180       24646 :                 if (args[i] != args[i-1]+1)
     181             :                     sequential = false;
     182       62602 :             if (sequential)
     183       47946 :                 arguments[stepIndex].push_back(args[0]);
     184             :             else
     185       14656 :                 arguments[stepIndex] = args;
     186             :         }
     187             :     }
     188       87777 :     temps.push_back(make_pair(node, (int) workspace.size()));
     189       87777 :     workspace.push_back(0.0);
     190             : }
     191             : 
     192      190834 : int CompiledExpression::findTempIndex(const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, int> >& temps) {
     193     3637184 :     for (int i = 0; i < (int) temps.size(); i++)
     194     3549407 :         if (temps[i].first == node)
     195             :             return i;
     196             :     return -1;
     197             : }
     198             : 
     199        3308 : const set<string>& CompiledExpression::getVariables() const {
     200        3308 :     return variableNames;
     201             : }
     202             : 
     203       74764 : double& CompiledExpression::getVariableReference(const string& name) {
     204             :     map<string, double*>::iterator pointer = variablePointers.find(name);
     205       74764 :     if (pointer != variablePointers.end())
     206           0 :         return *pointer->second;
     207             :     map<string, int>::iterator index = variableIndices.find(name);
     208       74764 :     if (index == variableIndices.end())
     209       12418 :         throw Exception("getVariableReference: Unknown variable '"+name+"'");
     210       68555 :     return workspace[index->second];
     211             : }
     212             : 
     213       16421 : void CompiledExpression::setVariableLocations(map<string, double*>& variableLocations) {
     214             :   variablePointers = variableLocations;
     215       16421 :   static const bool asmjit=useAsmJit();
     216       16421 :   if(asmjit) {
     217             : #ifdef __PLUMED_HAS_ASMJIT
     218             :     // Rebuild the JIT code.
     219             :     
     220       16097 :     if (workspace.size() > 0)
     221       16097 :         generateJitCode();
     222             : #endif
     223             :   } else {
     224             :     // Make a list of all variables we will need to copy before evaluating the expression.
     225             :     
     226         324 :     variablesToCopy.clear();
     227         584 :     for (map<string, int>::const_iterator iter = variableIndices.begin(); iter != variableIndices.end(); ++iter) {
     228         260 :         map<string, double*>::iterator pointer = variablePointers.find(iter->first);
     229         260 :         if (pointer != variablePointers.end())
     230           0 :             variablesToCopy.push_back(make_pair(&workspace[iter->second], pointer->second));
     231             :     }
     232             :   }
     233       16421 : }
     234             : 
     235    68481826 : double CompiledExpression::evaluate() const {
     236    68481826 :     static const bool asmjit=useAsmJit();
     237             : #ifdef __PLUMED_HAS_ASMJIT
     238    68481826 :     if(asmjit) return ((double (*)()) jitCode)();
     239             : #endif
     240       16443 :     for (int i = 0; i < variablesToCopy.size(); i++)
     241           0 :         *variablesToCopy[i].first = *variablesToCopy[i].second;
     242             : 
     243             :     // Loop over the operations and evaluate each one.
     244             :     
     245       49474 :     for (int step = 0; step < operation.size(); step++) {
     246             :         const vector<int>& args = arguments[step];
     247       33031 :         if (args.size() == 1)
     248       30198 :             workspace[target[step]] = operation[step]->evaluate(&workspace[args[0]], dummyVariables);
     249             :         else {
     250        8905 :             for (int i = 0; i < args.size(); i++)
     251        6072 :                 argValues[i] = workspace[args[i]];
     252        2833 :             workspace[target[step]] = operation[step]->evaluate(&argValues[0], dummyVariables);
     253             :         }
     254             :     }
     255       16443 :     return workspace[workspace.size()-1];
     256             : }
     257             : 
     258             : #ifdef __PLUMED_HAS_ASMJIT
     259    11049185 : static double evaluateOperation(Operation* op, double* args) {
     260    11049185 :     static map<string, double> dummyVariables;
     261    11049185 :     return op->evaluate(args, dummyVariables);
     262             : }
     263             : 
     264             : static void generateSingleArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg, double (*function)(double));
     265             : static void generateTwoArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg1, X86Xmm& arg2, double (*function)(double, double));
     266             : 
     267       32111 : void CompiledExpression::generateJitCode() {
     268       32111 :     CodeHolder code;
     269             :     auto & runtime(*static_cast<asmjit::JitRuntime*>(runtimeptr.get()));
     270       32111 :     code.init(runtime.getCodeInfo());
     271       32111 :     X86Compiler c(&code);
     272       32111 :     c.addFunc(FuncSignature0<double>());
     273       32111 :     vector<X86Xmm> workspaceVar(workspace.size());
     274      206495 :     for (int i = 0; i < (int) workspaceVar.size(); i++)
     275      174384 :         workspaceVar[i] = c.newXmmSd();
     276             :     X86Gp argsPointer = c.newIntPtr();
     277       32111 :     c.mov(argsPointer, imm_ptr(&argValues[0]));
     278             :     
     279             :     // Load the arguments into variables.
     280             :     
     281       77472 :     for (set<string>::const_iterator iter = variableNames.begin(); iter != variableNames.end(); ++iter) {
     282             :         map<string, int>::iterator index = variableIndices.find(*iter);
     283             :         X86Gp variablePointer = c.newIntPtr();
     284       90722 :         c.mov(variablePointer, imm_ptr(&getVariableReference(index->first)));
     285       45361 :         c.movsd(workspaceVar[index->second], x86::ptr(variablePointer, 0, 0));
     286             :     }
     287             : 
     288             :     // Make a list of all constants that will be needed for evaluation.
     289             :     
     290       32111 :     vector<int> operationConstantIndex(operation.size(), -1);
     291      161134 :     for (int step = 0; step < (int) operation.size(); step++) {
     292             :         // Find the constant value (if any) used by this operation.
     293             :         
     294      129023 :         Operation& op = *operation[step];
     295             :         double value;
     296      129023 :         if (op.getId() == Operation::CONSTANT)
     297        4282 :             value = dynamic_cast<Operation::Constant&>(op).getValue();
     298      124741 :         else if (op.getId() == Operation::ADD_CONSTANT)
     299        7176 :             value = dynamic_cast<Operation::AddConstant&>(op).getValue();
     300      117565 :         else if (op.getId() == Operation::MULTIPLY_CONSTANT)
     301       27770 :             value = dynamic_cast<Operation::MultiplyConstant&>(op).getValue();
     302       89795 :         else if (op.getId() == Operation::RECIPROCAL)
     303        2902 :             value = 1.0;
     304       86893 :         else if (op.getId() == Operation::STEP)
     305        1246 :             value = 1.0;
     306       85647 :         else if (op.getId() == Operation::DELTA)
     307        1012 :             value = 1.0/0.0;
     308       84635 :         else if (op.getId() == Operation::NANDELTA)
     309          12 :             value = std::numeric_limits<double>::quiet_NaN();
     310             :         else
     311       84623 :             continue;
     312             :         
     313             :         // See if we already have a variable for this constant.
     314             :         
     315      501766 :         for (int i = 0; i < (int) constants.size(); i++)
     316      463824 :             if (value == constants[i]) {
     317        6458 :                 operationConstantIndex[step] = i;
     318        6458 :                 break;
     319             :             }
     320       44400 :         if (operationConstantIndex[step] == -1) {
     321       37942 :             operationConstantIndex[step] = constants.size();
     322       37942 :             constants.push_back(value);
     323             :         }
     324             :     }
     325             :     
     326             :     // Load constants into variables.
     327             :     
     328       32111 :     vector<X86Xmm> constantVar(constants.size());
     329       32111 :     if (constants.size() > 0) {
     330             :         X86Gp constantsPointer = c.newIntPtr();
     331       14892 :         c.mov(constantsPointer, imm_ptr(&constants[0]));
     332       52834 :         for (int i = 0; i < (int) constants.size(); i++) {
     333       37942 :             constantVar[i] = c.newXmmSd();
     334       37942 :             c.movsd(constantVar[i], x86::ptr(constantsPointer, 8*i, 0));
     335             :         }
     336             :     }
     337             :     
     338             :     // Evaluate the operations.
     339             :     
     340      161134 :     for (int step = 0; step < (int) operation.size(); step++) {
     341      129023 :         Operation& op = *operation[step];
     342      129023 :         vector<int> args = arguments[step];
     343      129023 :         if (args.size() == 1) {
     344             :             // One or more sequential arguments.  Fill out the list.
     345             :             
     346      119525 :             for (int i = 1; i < op.getNumArguments(); i++)
     347       19826 :                 args.push_back(args[0]+i);
     348             :         }
     349             :         
     350             :         // Generate instructions to execute this operation.
     351             :         
     352      129023 :         switch (op.getId()) {
     353        4282 :             case Operation::CONSTANT:
     354        4282 :                 c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     355      128617 :                 break;
     356             :             case Operation::ADD:
     357       18990 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     358       18990 :                 c.addsd(workspaceVar[target[step]], workspaceVar[args[1]]);
     359             :                 break;
     360             :             case Operation::SUBTRACT:
     361        4648 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     362        4648 :                 c.subsd(workspaceVar[target[step]], workspaceVar[args[1]]);
     363             :                 break;
     364             :             case Operation::MULTIPLY:
     365       13404 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     366       13404 :                 c.mulsd(workspaceVar[target[step]], workspaceVar[args[1]]);
     367             :                 break;
     368             :             case Operation::DIVIDE:
     369       11900 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     370       11900 :                 c.divsd(workspaceVar[target[step]], workspaceVar[args[1]]);
     371             :                 break;
     372             :             case Operation::POWER:
     373           8 :                 generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], pow);
     374             :                 break;
     375        3016 :             case Operation::NEGATE:
     376        3016 :                 c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
     377        3016 :                 c.subsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     378             :                 break;
     379             :             case Operation::SQRT:
     380        2716 :                 c.sqrtsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     381             :                 break;
     382             :             case Operation::EXP:
     383        7164 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], exp);
     384             :                 break;
     385             :             case Operation::LOG:
     386        1476 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], log);
     387             :                 break;
     388             :             case Operation::SIN:
     389        1530 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sin);
     390             :                 break;
     391             :             case Operation::COS:
     392        2266 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cos);
     393             :                 break;
     394             :             case Operation::TAN:
     395           8 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tan);
     396             :                 break;
     397             :             case Operation::ASIN:
     398           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asin);
     399             :                 break;
     400             :             case Operation::ACOS:
     401          24 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acos);
     402             :                 break;
     403             :             case Operation::ATAN:
     404           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atan);
     405             :                 break;
     406             :             case Operation::ATAN2:
     407         104 :                 generateTwoArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], workspaceVar[args[1]], atan2);
     408             :                 break;
     409             :             case Operation::SINH:
     410           8 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], sinh);
     411             :                 break;
     412             :             case Operation::COSH:
     413           8 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], cosh);
     414             :                 break;
     415             :             case Operation::TANH:
     416          56 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], tanh);
     417             :                 break;
     418             :             case Operation::ASINH:
     419           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], asinh);
     420             :                 break;
     421             :             case Operation::ACOSH:
     422           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], acosh);
     423             :                 break;
     424             :             case Operation::ATANH:
     425           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], atanh);
     426             :                 break;
     427        1246 :             case Operation::STEP:
     428        1246 :                 c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
     429        1246 :                 c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(18)); // Comparison mode is _CMP_LE_OQ = 18
     430        1246 :                 c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     431             :                 break;
     432        1012 :             case Operation::DELTA:
     433        1012 :                 c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
     434        1012 :                 c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
     435        1012 :                 c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     436             :                 break;
     437          12 :             case Operation::NANDELTA:
     438          12 :                 c.xorps(workspaceVar[target[step]], workspaceVar[target[step]]);
     439          12 :                 c.cmpsd(workspaceVar[target[step]], workspaceVar[args[0]], imm(16)); // Comparison mode is _CMP_EQ_OS = 16
     440          12 :                 c.andps(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     441             :                 break;
     442             :             case Operation::SQUARE:
     443       16625 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     444       16625 :                 c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     445             :                 break;
     446             :             case Operation::CUBE:
     447         202 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     448         202 :                 c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     449         202 :                 c.mulsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     450             :                 break;
     451        2902 :             case Operation::RECIPROCAL:
     452        2902 :                 c.movsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     453        2902 :                 c.divsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     454             :                 break;
     455             :             case Operation::ADD_CONSTANT:
     456        7176 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     457        7176 :                 c.addsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     458             :                 break;
     459             :             case Operation::MULTIPLY_CONSTANT:
     460       27770 :                 c.movsd(workspaceVar[target[step]], workspaceVar[args[0]]);
     461       27770 :                 c.mulsd(workspaceVar[target[step]], constantVar[operationConstantIndex[step]]);
     462             :                 break;
     463             :             case Operation::ABS:
     464          36 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], fabs);
     465             :                 break;
     466             :             case Operation::FLOOR:
     467           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], floor);
     468             :                 break;
     469             :             case Operation::CEIL:
     470           4 :                 generateSingleArgCall(c, workspaceVar[target[step]], workspaceVar[args[0]], ceil);
     471             :                 break;
     472             :             default:
     473             :                 // Just invoke evaluateOperation().
     474             :                 
     475         924 :                 for (int i = 0; i < (int) args.size(); i++)
     476         518 :                     c.movsd(x86::ptr(argsPointer, 8*i, 0), workspaceVar[args[i]]);
     477             :                 X86Gp fn = c.newIntPtr();
     478         406 :                 c.mov(fn, imm_ptr((void*) evaluateOperation));
     479         406 :                 CCFuncCall* call = c.call(fn, FuncSignature2<double, Operation*, double*>());
     480         406 :                 call->setArg(0, imm_ptr(&op));
     481         406 :                 call->setArg(1, imm_ptr(&argValues[0]));
     482         406 :                 call->setRet(0, workspaceVar[target[step]]);
     483             :         }
     484             :     }
     485       32111 :     c.ret(workspaceVar[workspace.size()-1]);
     486       32111 :     c.endFunc();
     487       32111 :     c.finalize();
     488       32111 :     runtime.add(&jitCode, &code);
     489       32111 : }
     490             : 
     491       12604 : void generateSingleArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg, double (*function)(double)) {
     492             :     X86Gp fn = c.newIntPtr();
     493       12604 :     c.mov(fn, imm_ptr((void*) function));
     494       12604 :     CCFuncCall* call = c.call(fn, FuncSignature1<double, double>());
     495             :     call->setArg(0, arg);
     496             :     call->setRet(0, dest);
     497       12604 : }
     498             : 
     499         112 : void generateTwoArgCall(X86Compiler& c, X86Xmm& dest, X86Xmm& arg1, X86Xmm& arg2, double (*function)(double, double)) {
     500             :     X86Gp fn = c.newIntPtr();
     501         112 :     c.mov(fn, imm_ptr((void*) function));
     502         224 :     CCFuncCall* call = c.call(fn, FuncSignature2<double, double, double>());
     503             :     call->setArg(0, arg1);
     504             :     call->setArg(1, arg2);
     505             :     call->setRet(0, dest);
     506         112 : }
     507             : 
     508             : #endif
     509             : }

Generated by: LCOV version 1.16