/* * Copyright (c) 2008 Andrew Garrett. * Copyright (c) 2008 River Tarnell * Derived from public domain code contributed by Victor Vasiliev. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely. This software is provided 'as-is', without any express or * implied warranty. */ #ifndef EXPRESSOR_H #define EXPRESSOR_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "aftypes.h" #include "afstring.h" #include "fray.h" namespace afp { template struct parser_grammar; template struct basic_expressor : boost::noncopyable { typedef boost::function (std::vector >)> func_t; basic_expressor(); ~basic_expressor(); basic_datum evaluate(basic_fray const &expr) const; void print_xml(std::ostream &strm, basic_fray const &expr) const; void add_variable(basic_fray const &name, basic_datum const &value); void add_function(basic_fray const &name, func_t value); void clear(); void clear_functions(); void clear_variables(); private: parser_grammar *grammar_; }; typedef basic_expressor expressor; typedef basic_expressor u32expressor; using namespace boost::spirit; /* * ABUSEFILTER EXPRESSION PARSER * ============================= * * This is the basic expression parser. It doesn't contain any AF logic * itself, but rather presents an interface for the user to add custom * functions and variables. * * The interface to the parser is the 'expressor' class. Use it like this: * * expressor e; * e.add_variable("ONE", 1); * e.evaluate("ONE + 2"); -- returns 3 * * Custom functions should have the following prototype: * * datum (std::vector b * a >= b * a === b returns true if a==b and both are the same type * a !== b return true if a != b or they are different types * * The parser uses afp::datum for its variables. This means it supports * strings, ints and floats, with automatic conversion between types. */ struct parse_error : std::runtime_error { parse_error(char const *what) : std::runtime_error(what) {} }; namespace { template int match(char const *, char const *); template basic_datum f_in(basic_datum const &a, basic_datum const &b) { basic_fray sa = a.toString(), sb = b.toString(); return basic_datum::from_int(std::search(sb.begin(), sb.end(), sa.begin(), sa.end()) != sb.end()); } template basic_datum f_like(basic_datum const &str, basic_datum const &pattern) { return basic_datum::from_int(match(pattern.toString().c_str(), str.toString().c_str())); } template basic_datum f_regex(basic_datum const &str, basic_datum const &pattern) { basic_fray f = pattern.toString(); boost::u32regex r = boost::make_u32regex(f.begin(), f.end(), boost::regex_constants::perl); basic_fray s = str.toString(); return basic_datum::from_int(boost::u32regex_match( s.begin(), s.end(), r)); } template basic_datum f_int(std::vector > const &args) { if (args.size() != 1) throw parse_error("wrong number of arguments to int() (expected 1)"); return basic_datum::from_int(args[0].toInt()); } template basic_datum f_string(std::vector > const &args) { if (args.size() != 1) throw parse_error("wrong number of arguments to string() (expected 1)"); return basic_datum::from_string(args[0].toString()); } template basic_datum f_float(std::vector > const &args) { if (args.size() != 1) throw parse_error("wrong number of arguments to float() (expected 1)"); return basic_datum::from_double(args[0].toFloat()); } } // anonymous namespace /* * The grammar itself. */ template struct parser_grammar : public grammar > { static const int id_value = 1; static const int id_variable = 2; static const int id_basic = 3; static const int id_bool_expr = 4; static const int id_ord_expr = 5; static const int id_eq_expr = 6; static const int id_pow_expr = 7; static const int id_mult_expr = 8; static const int id_plus_expr = 9; static const int id_in_expr = 10; static const int id_function = 12; static const int id_tern_expr = 13; static const int id_string = 14; /* User-defined variables. */ symbols, charT > variables; void add_variable(basic_fray const &name, basic_datum const &value) { variables.add(name.c_str(), value); } /* User-defined functions. */ symbols (std::vector >)>, charT > functions; void add_function( basic_fray const &name, boost::function (std::vector >)> func) { functions.add(name.c_str(), func); } symbols eq_opers, ord_opers, plus_opers, mult_opers, in_opers, bool_opers; parser_grammar() { eq_opers.add("=", 0); eq_opers.add("==", 0); eq_opers.add("===", 0); eq_opers.add("!=", 0); eq_opers.add("!==", 0); eq_opers.add("/=", 0); ord_opers.add("<", 0); ord_opers.add("<=", 0); ord_opers.add(">", 0); ord_opers.add(">=", 0); plus_opers.add("+", 0); plus_opers.add("-", 0); mult_opers.add("*", 0); mult_opers.add("/", 0); mult_opers.add("%", 0); bool_opers.add("&", 0); bool_opers.add("|", 0); bool_opers.add("^", 0); in_opers.add("in", 0); in_opers.add("contains", 0); in_opers.add("matches", 0); in_opers.add("like", 0); in_opers.add("rlike", 0); in_opers.add("regex", 0); } template struct definition { parser_grammar const &self_; definition(parser_grammar const &self) : self_(self) { /* * A literal value. Either a string, a floating * pointer number or an integer. */ value = strict_real_p | as_lower_d[ leaf_node_d[ oct_p >> 'o' | hex_p >> 'x' | bin_p >> 'b' | int_p ] ] | string ; /* * config_p can't be used here, because it will rewrite * *(c_escape_ch_p[x]) into (*c_escape_ch_p)[x] */ string = inner_node_d[ '"' >> leaf_node_d[ *(lex_escape_ch_p - '"') ] >> '"' ] ; /* * A variable. If the variable is found in the * user-supplied variable list, we use that. * Otherwise, unknown variables (containing uppercase * letters and underscore only) are returned as the * empty string. */ variable = longest_d[ self.variables | leaf_node_d[ (+ (upper_p | '_') ) ] ] ; /* * A function call: func([arg[, arg...]]). */ function = ( root_node_d[self.functions] >> inner_node_d[ '(' >> ( tern_expr % discard_node_d[ch_p(',')] ) >> ')' ] ) ; /* * A basic atomic value. Either a variable, function * or literal, or a negated expression !a, or a * parenthesised expression (a). */ basic = value | variable | function | inner_node_d[ '(' >> tern_expr >> ')' ] | root_node_d[ch_p('!')] >> tern_expr | root_node_d[ch_p('+')] >> tern_expr | root_node_d[ch_p('-')] >> tern_expr ; /* * "a in b" operator */ in_expr = basic >> *( root_node_d[ self.in_opers ] >> basic ) ; /* * power-of. This is right-associative. */ pow_expr = in_expr >> !( root_node_d[ str_p("**") ] >> pow_expr ) ; /* * Multiplication and operators with the same * precedence. */ mult_expr = pow_expr >> *( root_node_d[ self.mult_opers ] >> pow_expr ) ; /* * Additional and operators with the same precedence. */ plus_expr = mult_expr >> *( root_node_d[ self.plus_opers ] >> mult_expr ) ; /* * Ordinal comparisons and operators with the same * precedence. */ ord_expr = plus_expr >> *( root_node_d[ self.ord_opers ] >> plus_expr ) ; /* * Equality comparisons. */ eq_expr = ord_expr >> *( root_node_d[ self.eq_opers ] >> ord_expr ) ; /* * Boolean expressions. */ bool_expr = eq_expr >> *( root_node_d[ self.bool_opers ] >> eq_expr ) ; /* * The ternary operator. Notice this is * right-associative: a ? b ? c : d : e * is supported. */ tern_expr = bool_expr >> !( root_node_d[ch_p('?')] >> tern_expr >> discard_node_d[ch_p(':')] >> tern_expr ) ; } rule, parser_tag > const &start() const { return tern_expr; } rule, parser_tag > value; rule, parser_tag > variable; rule, parser_tag > basic; rule, parser_tag > bool_expr; rule, parser_tag > ord_expr; rule, parser_tag > eq_expr; rule, parser_tag > pow_expr; rule, parser_tag > mult_expr; rule, parser_tag > plus_expr; rule, parser_tag > in_expr; rule, parser_tag > function; rule, parser_tag > tern_expr; rule, parser_tag > string; }; }; template basic_expressor::basic_expressor() : grammar_(new parser_grammar) { /* * We provide a couple of standard variables everyone wants. */ add_variable(make_astring("true"), afp::basic_datum::from_int(true)); add_variable(make_astring("false"), afp::basic_datum::from_int(false)); /* * The cast functions. */ add_function(make_astring("int"), &f_int); add_function(make_astring("string"), &f_string); add_function(make_astring("float"), &f_float); } template basic_expressor::~basic_expressor() { delete grammar_; } template struct ast_evaluator { typedef typename tree_match::const_iterator>::tree_iterator astiter_t; parser_grammar const &grammar_; ast_evaluator(parser_grammar const &grammar) : grammar_(grammar) { } basic_datum ast_eval_in(charT oper, astiter_t const &a, astiter_t const &b) { switch (oper) { case 'i': return f_in(tree_eval(a), tree_eval(b)); case 'c': return f_in(tree_eval(b), tree_eval(a)); case 'l': case 'm': return f_like(tree_eval(a), tree_eval(b)); case 'r': return f_regex(tree_eval(a), tree_eval(b)); default: abort(); } } basic_datum ast_eval_bool(charT oper, astiter_t const &a, astiter_t const &b) { switch (oper) { case '&': if (tree_eval(a).toBool()) if (tree_eval(b).toBool()) return basic_datum::from_int(1); return basic_datum::from_int(0); case '|': if (tree_eval(a).toBool()) return basic_datum::from_int(1); else if (tree_eval(b).toBool()) return basic_datum::from_int(1); return basic_datum::from_int(0); case '^': { int va = tree_eval(a).toBool(), vb = tree_eval(b).toBool(); if ((va && !vb) || (!va && vb)) return basic_datum::from_int(1); return basic_datum::from_int(0); } } abort(); } basic_datum ast_eval_plus(charT oper, astiter_t const &a, astiter_t const &b) { switch (oper) { case '+': return tree_eval(a) + tree_eval(b); case '-': return tree_eval(a) - tree_eval(b); default: abort(); } } basic_datum ast_eval_mult(charT oper, astiter_t const &a, astiter_t const &b) { switch (oper) { case '*': return tree_eval(a) * tree_eval(b); case '/': return tree_eval(a) / tree_eval(b); case '%': return tree_eval(a) % tree_eval(b); default: abort(); } } basic_datum ast_eval_ord(basic_fray const &oper, astiter_t const &a, astiter_t const &b) { switch (oper.size()) { case 1: switch (oper[0]) { case '<': return basic_datum::from_int(tree_eval(a) < tree_eval(b)); case '>': return basic_datum::from_int(tree_eval(a) > tree_eval(b)); default: abort(); } case 2: switch(oper[0]) { case '<': return basic_datum::from_int(tree_eval(a) <= tree_eval(b)); case '>': return basic_datum::from_int(tree_eval(a) >= tree_eval(b)); default: abort(); } default: abort(); } } basic_datum ast_eval_eq(basic_fray const &oper, astiter_t const &a, astiter_t const &b) { switch (oper.size()) { case 1: /* = */ return basic_datum::from_int(tree_eval(a) == tree_eval(b)); case 2: /* != /= == */ switch (oper[0]) { case '!': case '/': return basic_datum::from_int(tree_eval(a) != tree_eval(b)); case '=': return basic_datum::from_int(tree_eval(a) == tree_eval(b)); default: abort(); } case 3: /* === !== */ switch (oper[0]) { case '=': return basic_datum::from_int(tree_eval(a).compare_with_type(tree_eval(b))); case '!': return basic_datum::from_int(!tree_eval(a).compare_with_type(tree_eval(b))); default: abort(); } default: abort(); } } basic_datum ast_eval_pow(astiter_t const &a, astiter_t const &b) { return pow(tree_eval(a), tree_eval(b)); } basic_datum ast_eval_string(basic_fray const &s) { std::vector ret; ret.reserve(int(s.size() * 1.2)); for (std::size_t i = 0, end = s.size(); i < end; ++i) { if (s[i] != '\\') { ret.push_back(s[i]); continue; } if (i+1 == end) break; switch (s[i + 1]) { case 't': ret.push_back('\t'); break; case 'n': ret.push_back('\n'); break; case 'r': ret.push_back('\r'); break; case 'b': ret.push_back('\b'); break; case 'a': ret.push_back('\a'); break; case 'f': ret.push_back('\f'); break; case 'v': ret.push_back('\v'); break; default: ret.push_back(s[i + 1]); break; } i++; } return basic_datum::from_string(basic_fray(ret.begin(), ret.end())); } basic_datum ast_eval_tern(astiter_t const &cond, astiter_t const &iftrue, astiter_t const &iffalse) { if (tree_eval(cond).toBool()) return tree_eval(iftrue); else return tree_eval(iffalse); } basic_datum ast_eval_num(basic_fray const &s) { if (s.find('.') != basic_fray::npos) return basic_datum::from_double(std::strtod(make_u8fray(s).c_str(), 0)); int base; switch (s[s.size() - 1]) { case 'x': base = 16; break; case 'o': base = 8; break; case 'b': base = 2; break; default: base = 10; break; } return basic_datum::from_int(std::strtol(make_u8fray(s).c_str(), 0, base)); } basic_datum ast_eval_function(basic_fray const &f, astiter_t abegin, astiter_t const &aend) { std::vector > args; for (; abegin != aend; ++abegin) args.push_back(tree_eval(abegin)); boost::function (std::vector >)> *fptr; if ((fptr = find(grammar_.functions, f.c_str())) == NULL) return basic_datum::from_string(basic_fray()); else return (*fptr)(args); } basic_datum ast_eval_basic(charT op, astiter_t const &val) { switch (op) { case '!': if (tree_eval(val).toBool()) return basic_datum::from_int(0); else return basic_datum::from_int(1); case '-': return -tree_eval(val); case '+': return tree_eval(val); default: abort(); } } basic_datum ast_eval_variable(basic_fray const &v) { basic_datum const *var; if ((var = find(grammar_.variables, v.c_str())) == NULL) return basic_datum::from_string(basic_fray()); else return *var; } basic_datum tree_eval(astiter_t const &i) { switch (i->value.id().to_long()) { case parser_grammar::id_value: return ast_eval_num( basic_fray(i->value.begin(), i->value.end())); case parser_grammar::id_string: return ast_eval_string(basic_fray(i->value.begin(), i->value.end())); case parser_grammar::id_basic: return ast_eval_basic(*i->value.begin(), i->children.begin()); case parser_grammar::id_variable: return ast_eval_variable(basic_fray(i->value.begin(), i->value.end())); case parser_grammar::id_function: return ast_eval_function( basic_fray(i->value.begin(), i->value.end()), i->children.begin(), i->children.end()); case parser_grammar::id_in_expr: return ast_eval_in(*i->value.begin(), i->children.begin(), i->children.begin() + 1); case parser_grammar::id_bool_expr: return ast_eval_bool(*i->value.begin(), i->children.begin(), i->children.begin() + 1); case parser_grammar::id_plus_expr: return ast_eval_plus(*i->value.begin(), i->children.begin(), i->children.begin() + 1); case parser_grammar::id_mult_expr: return ast_eval_mult(*i->value.begin(), i->children.begin(), i->children.begin() + 1); case parser_grammar::id_pow_expr: return ast_eval_pow(i->children.begin(), i->children.begin() + 1); case parser_grammar::id_ord_expr: return ast_eval_ord( basic_fray(i->value.begin(), i->value.end()), i->children.begin(), i->children.begin() + 1); case parser_grammar::id_eq_expr: return ast_eval_eq( basic_fray(i->value.begin(), i->value.end()), i->children.begin(), i->children.begin() + 1); case parser_grammar::id_tern_expr: return ast_eval_tern( i->children.begin(), i->children.begin() + 1, i->children.begin() + 2); default: std::cerr << "warning: unmatched expr type " << i->value.id().to_long() << "\n"; return basic_datum::from_int(0); } } }; /* * The user interface to evaluate an expression. It returns the result, or * throws an exception if an error occurs. */ template basic_datum basic_expressor::evaluate(basic_fray const &filter) const { using namespace boost::spirit; typedef typename basic_fray::const_iterator iterator_t; basic_datum ret; tree_parse_info info = ast_parse(filter.begin(), filter.end(), *grammar_, +chset<>("\n\t ") | comment_p("/*", "*/")); if (info.full) { ast_evaluator ae(*grammar_); return ae.tree_eval(info.trees.begin()); } else { //std::cerr << "stopped at: [" << basic_fray(info.stop, filter.end()) << "]\n"; throw parse_error("parsing failed"); } } template void basic_expressor::print_xml(std::ostream &strm, basic_fray const &filter) const { using namespace boost::spirit; typedef typename basic_fray::const_iterator iterator_t; tree_parse_info info = ast_parse(filter.begin(), filter.end(), *grammar_, +chset<>("\n\t ") | comment_p("/*", "*/")); if (info.full) { std::map rule_names; rule_names[parser_grammar::id_value] = "value"; rule_names[parser_grammar::id_variable] = "variable"; rule_names[parser_grammar::id_basic] = "basic"; rule_names[parser_grammar::id_bool_expr] = "bool_expr"; rule_names[parser_grammar::id_ord_expr] = "ord_expr"; rule_names[parser_grammar::id_eq_expr] = "eq_expr"; rule_names[parser_grammar::id_pow_expr] = "pow_expr"; rule_names[parser_grammar::id_mult_expr] = "mult_expr"; rule_names[parser_grammar::id_plus_expr] = "plus_expr"; rule_names[parser_grammar::id_in_expr] = "in_expr"; rule_names[parser_grammar::id_function] = "function"; rule_names[parser_grammar::id_tern_expr] = "tern_expr"; rule_names[parser_grammar::id_string] = "string"; tree_to_xml(strm, info.trees, "", rule_names); } else { throw parse_error("parsing failed"); } } template void basic_expressor::clear() { clear_variables(); clear_functions(); } template void basic_expressor::clear_variables() { symbols, charT > variables; grammar_->variables = variables; } template void basic_expressor::clear_functions() { symbols (std::vector >)>, charT > functions; grammar_->functions = functions; } template void basic_expressor::add_variable(basic_fray const &name, basic_datum const &value) { grammar_->add_variable(name, value); } template void basic_expressor::add_function(basic_fray const &name, func_t value) { grammar_->add_function(name, value); } namespace { /* $NetBSD: fnmatch.c,v 1.21 2005/12/24 21:11:16 perry Exp $ */ /* * Copyright (c) 1989, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Guido van Rossum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. * Compares a filename or pathname to a pattern. */ #include #include #define EOS '\0' template const charT *rangematch(const charT *, int); template int match(charT const *pattern, charT const *string) { const charT *stringstart; charT c, test; for (stringstart = string;;) switch (c = *pattern++) { case EOS: return (*string == EOS ? 1 : 0); case '?': if (*string == EOS) return (0); ++string; break; case '*': c = *pattern; /* Collapse multiple stars. */ while (c == '*') c = *++pattern; /* Optimize for pattern with * at end or before /. */ if (c == EOS) { return (1); } /* General case, use recursion. */ while ((test = *string) != EOS) { if (match(pattern, string)) return (1); ++string; } return (0); case '[': if (*string == EOS) return (0); if ((pattern = rangematch(pattern, *string)) == NULL) return (0); ++string; break; case '\\': if ((c = *pattern++) == EOS) { c = '\\'; --pattern; } /* FALLTHROUGH */ default: if (c != *string++) return (0); break; } /* NOTREACHED */ } template const charT * rangematch(charT const *pattern, int test) { int negate, ok; charT c, c2; /* * A bracket expression starting with an unquoted circumflex * character produces unspecified results (IEEE 1003.2-1992, * 3.13.2). This implementation treats it like '!', for * consistency with the regular expression syntax. * J.T. Conklin (conklin@ngai.kaleida.com) */ if ((negate = (*pattern == '!' || *pattern == '^')) != 0) ++pattern; for (ok = 0; (c = *pattern++) != ']';) { if (c == '\\') c = *pattern++; if (c == EOS) return (NULL); if (*pattern == '-' && (c2 = (*(pattern+1))) != EOS && c2 != ']') { pattern += 2; if (c2 == '\\') c2 = *pattern++; if (c2 == EOS) return (NULL); if (c <= test && test <= c2) ok = 1; } else if (c == test) ok = 1; } return (ok == negate ? NULL : pattern); } } // anonymous namespace } // namespace afp #endif /* !EXPRESSOR_H */