Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : * -------------------------------------------------------------------------- *
3 : * Lepton *
4 : * -------------------------------------------------------------------------- *
5 : * This is part of the Lepton expression parser originating from *
6 : * Simbios, the NIH National Center for Physics-Based Simulation of *
7 : * Biological Structures at Stanford, funded under the NIH Roadmap for *
8 : * Medical Research, grant U54 GM072970. See https://simtk.org. *
9 : * *
10 : * Portions copyright (c) 2013-2016 Stanford University and the Authors. *
11 : * Authors: Peter Eastman *
12 : * Contributors: *
13 : * *
14 : * Permission is hereby granted, free of charge, to any person obtaining a *
15 : * copy of this software and associated documentation files (the "Software"), *
16 : * to deal in the Software without restriction, including without limitation *
17 : * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
18 : * and/or sell copies of the Software, and to permit persons to whom the *
19 : * Software is furnished to do so, subject to the following conditions: *
20 : * *
21 : * The above copyright notice and this permission notice shall be included in *
22 : * all copies or substantial portions of the Software. *
23 : * *
24 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
25 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
26 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
27 : * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
28 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
29 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
30 : * USE OR OTHER DEALINGS IN THE SOFTWARE. *
31 : * -------------------------------------------------------------------------- *
32 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
33 : /* -------------------------------------------------------------------------- *
34 : * lepton *
35 : * -------------------------------------------------------------------------- *
36 : * This is part of the lepton expression parser originating from *
37 : * Simbios, the NIH National Center for Physics-Based Simulation of *
38 : * Biological Structures at Stanford, funded under the NIH Roadmap for *
39 : * Medical Research, grant U54 GM072970. See https://simtk.org. *
40 : * *
41 : * Portions copyright (c) 2009-2019 Stanford University and the Authors. *
42 : * Authors: Peter Eastman *
43 : * Contributors: *
44 : * *
45 : * Permission is hereby granted, free of charge, to any person obtaining a *
46 : * copy of this software and associated documentation files (the "Software"), *
47 : * to deal in the Software without restriction, including without limitation *
48 : * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
49 : * and/or sell copies of the Software, and to permit persons to whom the *
50 : * Software is furnished to do so, subject to the following conditions: *
51 : * *
52 : * The above copyright notice and this permission notice shall be included in *
53 : * all copies or substantial portions of the Software. *
54 : * *
55 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
56 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
57 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
58 : * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
59 : * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
60 : * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
61 : * USE OR OTHER DEALINGS IN THE SOFTWARE. *
62 : * -------------------------------------------------------------------------- */
63 :
64 : #include "Parser.h"
65 : #include "CustomFunction.h"
66 : #include "Exception.h"
67 : #include "ExpressionTreeNode.h"
68 : #include "Operation.h"
69 : #include "ParsedExpression.h"
70 : #include <cctype>
71 : #include <iostream>
72 :
73 : namespace PLMD {
74 : using namespace lepton;
75 : using namespace std;
76 :
77 : namespace lepton {
78 :
79 : static const string Digits = "0123456789";
80 : static const string Operators = "+-*/^";
81 : static const bool LeftAssociative[] = {true, true, true, true, false};
82 : static const int Precedence[] = {0, 0, 1, 1, 3};
83 : static const Operation::Id OperationId[] = {Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER};
84 :
85 2016320 : const std::map<std::string, double> & Constants() {
86 : static const std::map<std::string, double> constants = {
87 : {"e", std::exp(1.0)},
88 : {"log2e", 1.0/std::log(2.0)},
89 : {"log10e", 1.0/std::log(10.0)},
90 : {"ln2", std::log(2.0)},
91 : {"ln10", std::log(10.0)},
92 : {"pi", 3.14159265358979323844},
93 : {"pi_2", 3.14159265358979323844*0.5},
94 : {"pi_4", 3.14159265358979323844*0.25},
95 : // {"1_pi", 1.0/pi},
96 : // {"2_pi", 2.0/pi},
97 : // {"2_sqrtpi", 2.0/std::sqrt(pi)},
98 : {"sqrt2", std::sqrt(2.0)},
99 : {"sqrt1_2", std::sqrt(0.5)}
100 2019433 : };
101 2016320 : return constants;
102 : }
103 :
104 : }
105 :
106 :
107 6558251 : class lepton::ParseToken {
108 : public:
109 : enum Type {Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace};
110 :
111 2103178 : ParseToken(string text, Type type) : text(text), type(type) {
112 : }
113 : const string& getText() const {
114 13 : return text;
115 : }
116 : Type getType() const {
117 71409 : return type;
118 : }
119 : private:
120 : string text;
121 : Type type;
122 : };
123 :
124 8 : string Parser::trim(const string& expression) {
125 : // Remove leading and trailing spaces.
126 :
127 : int start, end;
128 12 : for (start = 0; start < (int) expression.size() && isspace(expression[start]); start++)
129 : ;
130 8 : for (end = (int) expression.size()-1; end > start && isspace(expression[end]); end--)
131 : ;
132 8 : if (start == end && isspace(expression[end]))
133 0 : return "";
134 8 : return expression.substr(start, end-start+1);
135 : }
136 :
137 2103178 : ParseToken Parser::getNextToken(const string& expression, int start) {
138 2103178 : char c = expression[start];
139 2103178 : if (c == '(')
140 16730 : return ParseToken("(", ParseToken::LeftParen);
141 2094813 : if (c == ')')
142 27634 : return ParseToken(")", ParseToken::RightParen);
143 2080996 : if (c == ',')
144 294 : return ParseToken(",", ParseToken::Comma);
145 2080849 : if (Operators.find(c) != string::npos)
146 30784 : return ParseToken(string(1, c), ParseToken::Operator);
147 2050065 : if (isspace(c)) {
148 : // White space
149 :
150 28 : for (int pos = start+1; pos < (int) expression.size(); pos++) {
151 21 : if (!isspace(expression[pos]))
152 42 : return ParseToken(expression.substr(start, pos-start), ParseToken::Whitespace);
153 : }
154 14 : return ParseToken(expression.substr(start, string::npos), ParseToken::Whitespace);
155 : }
156 2050037 : if (c == '.' || Digits.find(c) != string::npos) {
157 : // A number
158 :
159 2023409 : bool foundDecimal = (c == '.');
160 : bool foundExp = false;
161 : int pos;
162 20148594 : for (pos = start+1; pos < (int) expression.size(); pos++) {
163 18139905 : c = expression[pos];
164 18139905 : if (Digits.find(c) != string::npos)
165 16108899 : continue;
166 2031006 : if (c == '.' && !foundDecimal) {
167 : foundDecimal = true;
168 2016100 : continue;
169 : }
170 14906 : if ((c == 'e' || c == 'E') && !foundExp) {
171 : foundExp = true;
172 186 : if (pos < (int) expression.size()-1 && (expression[pos+1] == '-' || expression[pos+1] == '+'))
173 : pos++;
174 186 : continue;
175 : }
176 : break;
177 : }
178 4046818 : return ParseToken(expression.substr(start, pos-start), ParseToken::Number);
179 : }
180 :
181 : // A variable, function, or left parenthesis
182 :
183 67826 : for (int pos = start; pos < (int) expression.size(); pos++) {
184 63513 : c = expression[pos];
185 63513 : if (c == '(')
186 10904 : return ParseToken(expression.substr(start, pos-start+1), ParseToken::Function);
187 58061 : if (Operators.find(c) != string::npos || c == ',' || c == ')' || isspace(c))
188 33726 : return ParseToken(expression.substr(start, pos-start), ParseToken::Variable);
189 : }
190 8626 : return ParseToken(expression.substr(start, string::npos), ParseToken::Variable);
191 : }
192 :
193 2016347 : vector<ParseToken> Parser::tokenize(const string& expression) {
194 : vector<ParseToken> tokens;
195 : int pos = 0;
196 4119525 : while (pos < (int) expression.size()) {
197 2103178 : ParseToken token = getNextToken(expression, pos);
198 2103178 : if (token.getType() != ParseToken::Whitespace)
199 2103150 : tokens.push_back(token);
200 2103178 : pos += (int) token.getText().size();
201 : }
202 2016347 : return tokens;
203 0 : }
204 :
205 2016343 : ParsedExpression Parser::parse(const string& expression) {
206 4032673 : return parse(expression, map<string, CustomFunction*>());
207 : }
208 :
209 2016343 : ParsedExpression Parser::parse(const string& expression, const map<string, CustomFunction*>& customFunctions) {
210 : try {
211 : // First split the expression into subexpressions.
212 :
213 2016343 : string primaryExpression = expression;
214 : vector<string> subexpressions;
215 : while (true) {
216 : string::size_type pos = primaryExpression.find_last_of(';');
217 2016347 : if (pos == string::npos)
218 : break;
219 8 : string sub = trim(primaryExpression.substr(pos+1));
220 4 : if (sub.size() > 0)
221 4 : subexpressions.push_back(sub);
222 8 : primaryExpression = primaryExpression.substr(0, pos);
223 4 : }
224 :
225 : // Parse the subexpressions.
226 :
227 : map<string, ExpressionTreeNode> subexpDefs;
228 2016347 : for (int i = 0; i < (int) subexpressions.size(); i++) {
229 4 : string::size_type equalsPos = subexpressions[i].find('=');
230 4 : if (equalsPos == string::npos)
231 0 : throw Exception("subexpression does not specify a name");
232 21 : string name = trim(subexpressions[i].substr(0, equalsPos));
233 4 : if (name.size() == 0)
234 0 : throw Exception("subexpression does not specify a name");
235 4 : vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos+1));
236 4 : int pos = 0;
237 4 : subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
238 4 : if (pos != tokens.size())
239 0 : throw Exception("unexpected text at end of subexpression: "+tokens[pos].getText());
240 4 : }
241 :
242 : // Now parse the primary expression.
243 :
244 2016343 : vector<ParseToken> tokens = tokenize(primaryExpression);
245 2016343 : int pos = 0;
246 2016343 : ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
247 2016343 : if (pos != tokens.size())
248 26 : throw Exception("unexpected text at end of expression: "+tokens[pos].getText());
249 4032660 : return ParsedExpression(result);
250 4032699 : }
251 13 : catch (Exception& ex) {
252 26 : throw Exception("Parse error in expression \""+expression+"\": "+ex.what());
253 13 : }
254 : }
255 :
256 2061095 : ExpressionTreeNode Parser::parsePrecedence(const vector<ParseToken>& tokens, int& pos, const map<string, CustomFunction*>& customFunctions,
257 : const map<string, ExpressionTreeNode>& subexpressionDefs, int precedence) {
258 2061095 : if (pos == tokens.size())
259 0 : throw Exception("unexpected end of expression");
260 :
261 : // Parse the next value (number, variable, function, parenthesized expression)
262 :
263 : ParseToken token = tokens[pos];
264 2061095 : ExpressionTreeNode result;
265 2061095 : if (token.getType() == ParseToken::Number) {
266 : double value;
267 4046818 : stringstream(token.getText()) >> value;
268 2023409 : result = ExpressionTreeNode(new Operation::Constant(value));
269 2023409 : pos++;
270 : }
271 37686 : else if (token.getType() == ParseToken::Variable) {
272 : map<string, ExpressionTreeNode>::const_iterator subexp = subexpressionDefs.find(token.getText());
273 21163 : if (subexp == subexpressionDefs.end()) {
274 21159 : Operation* op = new Operation::Variable(token.getText());
275 21159 : result = ExpressionTreeNode(op);
276 : }
277 : else
278 4 : result = subexp->second;
279 21163 : pos++;
280 : }
281 16523 : else if (token.getType() == ParseToken::LeftParen) {
282 8365 : pos++;
283 8365 : result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
284 8365 : if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
285 0 : throw Exception("unbalanced parentheses");
286 8365 : pos++;
287 : }
288 8158 : else if (token.getType() == ParseToken::Function) {
289 5452 : pos++;
290 : vector<ExpressionTreeNode> args;
291 : bool moreArgs;
292 5599 : do {
293 11198 : args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
294 5599 : moreArgs = (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Comma);
295 : if (moreArgs)
296 147 : pos++;
297 : } while (moreArgs);
298 5452 : if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen)
299 0 : throw Exception("unbalanced parentheses");
300 5452 : pos++;
301 5452 : Operation* op = getFunctionOperation(token.getText(), customFunctions);
302 : try {
303 5452 : result = ExpressionTreeNode(op, args);
304 : }
305 0 : catch (...) {
306 0 : delete op;
307 0 : throw;
308 0 : }
309 5452 : }
310 5412 : else if (token.getType() == ParseToken::Operator && token.getText() == "-") {
311 2706 : pos++;
312 2706 : ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
313 2706 : result = ExpressionTreeNode(new Operation::Negate(), toNegate);
314 2706 : }
315 : else
316 0 : throw Exception("unexpected token: "+token.getText());
317 :
318 : // Now deal with the next binary operator.
319 :
320 2089173 : while (pos < (int) tokens.size() && tokens[pos].getType() == ParseToken::Operator) {
321 : token = tokens[pos];
322 38470 : int opIndex = (int) Operators.find(token.getText());
323 38470 : int opPrecedence = Precedence[opIndex];
324 38470 : if (opPrecedence < precedence)
325 10392 : return result;
326 28078 : pos++;
327 28078 : ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence+1 : opPrecedence);
328 28078 : Operation* op = getOperatorOperation(token.getText());
329 : try {
330 28078 : result = ExpressionTreeNode(op, result, arg);
331 : }
332 0 : catch (...) {
333 0 : delete op;
334 0 : throw;
335 0 : }
336 28078 : }
337 : return result;
338 0 : }
339 :
340 28078 : Operation* Parser::getOperatorOperation(const std::string& name) {
341 28078 : switch (OperationId[Operators.find(name)]) {
342 4916 : case Operation::ADD:
343 4916 : return new Operation::Add();
344 4458 : case Operation::SUBTRACT:
345 4458 : return new Operation::Subtract();
346 10955 : case Operation::MULTIPLY:
347 10955 : return new Operation::Multiply();
348 4961 : case Operation::DIVIDE:
349 4961 : return new Operation::Divide();
350 2788 : case Operation::POWER:
351 2788 : return new Operation::Power();
352 0 : default:
353 0 : throw Exception("unknown operator");
354 : }
355 : }
356 :
357 5452 : Operation* Parser::getFunctionOperation(const std::string& name, const map<string, CustomFunction*>& customFunctions) {
358 :
359 196 : const static map<string, Operation::Id> opMap = []() {
360 : map<string, Operation::Id> opMap;
361 196 : opMap["sqrt"] = Operation::SQRT;
362 196 : opMap["exp"] = Operation::EXP;
363 196 : opMap["log"] = Operation::LOG;
364 196 : opMap["sin"] = Operation::SIN;
365 196 : opMap["cos"] = Operation::COS;
366 196 : opMap["sec"] = Operation::SEC;
367 196 : opMap["csc"] = Operation::CSC;
368 196 : opMap["tan"] = Operation::TAN;
369 196 : opMap["cot"] = Operation::COT;
370 196 : opMap["asin"] = Operation::ASIN;
371 196 : opMap["acos"] = Operation::ACOS;
372 196 : opMap["atan"] = Operation::ATAN;
373 196 : opMap["atan2"] = Operation::ATAN2;
374 196 : opMap["sinh"] = Operation::SINH;
375 196 : opMap["cosh"] = Operation::COSH;
376 196 : opMap["tanh"] = Operation::TANH;
377 196 : opMap["erf"] = Operation::ERF;
378 196 : opMap["erfc"] = Operation::ERFC;
379 196 : opMap["step"] = Operation::STEP;
380 196 : opMap["delta"] = Operation::DELTA;
381 196 : opMap["nandelta"] = Operation::NANDELTA;
382 196 : opMap["square"] = Operation::SQUARE;
383 196 : opMap["cube"] = Operation::CUBE;
384 196 : opMap["recip"] = Operation::RECIPROCAL;
385 196 : opMap["min"] = Operation::MIN;
386 196 : opMap["max"] = Operation::MAX;
387 196 : opMap["abs"] = Operation::ABS;
388 196 : opMap["floor"] = Operation::FLOOR;
389 196 : opMap["ceil"] = Operation::CEIL;
390 196 : opMap["select"] = Operation::SELECT;
391 196 : opMap["acot"] = Operation::ACOT;
392 196 : opMap["asec"] = Operation::ASEC;
393 196 : opMap["acsc"] = Operation::ACSC;
394 196 : opMap["coth"] = Operation::COTH;
395 196 : opMap["sech"] = Operation::SECH;
396 196 : opMap["csch"] = Operation::CSCH;
397 196 : opMap["asinh"] = Operation::ASINH;
398 196 : opMap["acosh"] = Operation::ACOSH;
399 196 : opMap["atanh"] = Operation::ATANH;
400 196 : opMap["acoth"] = Operation::ACOTH;
401 196 : opMap["asech"] = Operation::ASECH;
402 196 : opMap["acsch"] = Operation::ACSCH;
403 196 : opMap["atan2"] = Operation::ATAN2;
404 196 : return opMap;
405 5452 : }();
406 5452 : string trimmed = name.substr(0, name.size()-1);
407 :
408 : // First check custom functions.
409 :
410 : map<string, CustomFunction*>::const_iterator custom = customFunctions.find(trimmed);
411 5452 : if (custom != customFunctions.end())
412 0 : return new Operation::Custom(trimmed, custom->second->clone());
413 :
414 : // Now try standard functions.
415 :
416 : map<string, Operation::Id>::const_iterator iter = opMap.find(trimmed);
417 5452 : if (iter == opMap.end())
418 0 : throw Exception("unknown function: "+trimmed);
419 5452 : switch (iter->second) {
420 684 : case Operation::SQRT:
421 684 : return new Operation::Sqrt();
422 1802 : case Operation::EXP:
423 1802 : return new Operation::Exp();
424 569 : case Operation::LOG:
425 569 : return new Operation::Log();
426 229 : case Operation::SIN:
427 229 : return new Operation::Sin();
428 1503 : case Operation::COS:
429 1503 : return new Operation::Cos();
430 4 : case Operation::SEC:
431 4 : return new Operation::Sec();
432 4 : case Operation::CSC:
433 4 : return new Operation::Csc();
434 4 : case Operation::TAN:
435 4 : return new Operation::Tan();
436 4 : case Operation::COT:
437 4 : return new Operation::Cot();
438 4 : case Operation::ASIN:
439 4 : return new Operation::Asin();
440 16 : case Operation::ACOS:
441 16 : return new Operation::Acos();
442 4 : case Operation::ATAN:
443 4 : return new Operation::Atan();
444 83 : case Operation::ATAN2:
445 83 : return new Operation::Atan2();
446 4 : case Operation::SINH:
447 4 : return new Operation::Sinh();
448 4 : case Operation::COSH:
449 4 : return new Operation::Cosh();
450 15 : case Operation::TANH:
451 15 : return new Operation::Tanh();
452 4 : case Operation::ERF:
453 4 : return new Operation::Erf();
454 4 : case Operation::ERFC:
455 4 : return new Operation::Erfc();
456 382 : case Operation::STEP:
457 382 : return new Operation::Step();
458 4 : case Operation::DELTA:
459 4 : return new Operation::Delta();
460 4 : case Operation::NANDELTA:
461 4 : return new Operation::Nandelta();
462 4 : case Operation::SQUARE:
463 4 : return new Operation::Square();
464 4 : case Operation::CUBE:
465 4 : return new Operation::Cube();
466 4 : case Operation::RECIPROCAL:
467 4 : return new Operation::Reciprocal();
468 8 : case Operation::MIN:
469 8 : return new Operation::Min();
470 8 : case Operation::MAX:
471 8 : return new Operation::Max();
472 13 : case Operation::ABS:
473 13 : return new Operation::Abs();
474 4 : case Operation::FLOOR:
475 4 : return new Operation::Floor();
476 4 : case Operation::CEIL:
477 4 : return new Operation::Ceil();
478 24 : case Operation::SELECT:
479 24 : return new Operation::Select();
480 4 : case Operation::ACOT:
481 4 : return new Operation::Acot();
482 4 : case Operation::ASEC:
483 4 : return new Operation::Asec();
484 4 : case Operation::ACSC:
485 4 : return new Operation::Acsc();
486 4 : case Operation::COTH:
487 4 : return new Operation::Coth();
488 4 : case Operation::SECH:
489 4 : return new Operation::Sech();
490 4 : case Operation::CSCH:
491 4 : return new Operation::Csch();
492 4 : case Operation::ASINH:
493 4 : return new Operation::Asinh();
494 4 : case Operation::ACOSH:
495 4 : return new Operation::Acosh();
496 4 : case Operation::ATANH:
497 4 : return new Operation::Atanh();
498 4 : case Operation::ACOTH:
499 4 : return new Operation::Acoth();
500 4 : case Operation::ASECH:
501 4 : return new Operation::Asech();
502 4 : case Operation::ACSCH:
503 4 : return new Operation::Acsch();
504 0 : default:
505 0 : throw Exception("unknown function");
506 : }
507 : }
508 : }
|