/* * Copyright (c) 2008 Andrew Garrett. * Copyright (c) 2008 River Tarnell * Derived from public domain code contributed by Victor Vasiliev. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely. This software is provided 'as-is', without any express or * implied warranty. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "parser.h" #include "ast.h" /* * ABUSEFILTER EXPRESSION PARSER * ============================= * * This is the basic expression parser. It doesn't contain any AF logic * itself, but rather presents an interface for the user to add custom * functions and variables. * * The interface to the parser is the 'expressor' class. Use it like this: * * expressor e; * e.add_variable("ONE", 1); * e.add_function("f", myfunc); * e.evaluate("ONE + 2"); -- returns 3 * * Custom functions should have the following prototype: * * afp::u32datum (std::vector const &args); * * Functions must return a value; they cannot be void. The arguments passed to * the function are stored in the 'args' array in left-to-right order. * * The parser implements a C-like grammar with some differences. The following * operators are available: * * a & b true if a and b are both true * a | b true if either a or b is true * a ^ b true if either a or b is true, but not if both are true * a + b arithmetic * a - b * a * b * a / b * a % b * a ** b power-of (a^b) * a in b true if the string "b" contains the substring "a" * a contains b true if b contains the string a * a like b true if a matches the Unix glob b * a matches b '' '' * a rlike b true if a matches the Perl regex b * a regex b '' '' * !a true if a is false * (a) same value as a * a ? b : c if a is true, returns the value of b, otherwise c * a == b comparison operators * a != b * a < b * a <= b * a > b * a >= b * a === b returns true if a==b and both are the same type * a !== b return true if a != b or they are different types * * The parser uses afp::datum for its variables. This means it supports * strings, ints and floats, with automatic conversion between types. * * String constants are C-style. The standard C escapes \a \b \f \t \r \n \v are * supported. \xHH encodes a 1-byte Unicode character, \uHHHH encodes a 2-byte * Unicode characters, and \UHHHHHHHH encodes a 4-byte Unicode character. * * Numeric constants can be integers (e.g. 1), or floating pointers (e.g. * 1., .1, 1.2). * * Function calls are f(arg1, arg2, ...). */ namespace afp { using namespace boost::spirit; /* * The grammar itself. */ struct parser_grammar : public grammar { parser_state &state_; void add_variable(u32fray const &name, u32datum const &value) { state_.variables.add(name.c_str(), value); } void add_function( u32fray name, boost::function)> func) { state_.functions.add(name.c_str(), func); } symbols eq_opers, ord_opers, plus_opers, mult_opers, in_opers, bool_opers; parser_grammar(parser_state &state) : state_(state) { eq_opers.add("=", 0); eq_opers.add("==", 0); eq_opers.add("===", 0); eq_opers.add("!=", 0); eq_opers.add("!==", 0); eq_opers.add("/=", 0); ord_opers.add("<", 0); ord_opers.add("<=", 0); ord_opers.add(">", 0); ord_opers.add(">=", 0); plus_opers.add("+", 0); plus_opers.add("-", 0); mult_opers.add("*", 0); mult_opers.add("/", 0); mult_opers.add("%", 0); bool_opers.add("&", 0); bool_opers.add("|", 0); bool_opers.add("^", 0); in_opers.add("in", 0); in_opers.add("contains", 0); in_opers.add("matches", 0); in_opers.add("like", 0); in_opers.add("rlike", 0); in_opers.add("regex", 0); state_.time_units.add("seconds", tval_seconds); state_.time_units.add("minutes", tval_minutes); state_.time_units.add("hours", tval_hours); state_.time_units.add("days", tval_days); state_.time_units.add("weeks", tval_weeks); state_.time_units.add("years", tval_years); state_.time_units.add("second", tval_seconds); state_.time_units.add("minute", tval_minutes); state_.time_units.add("hour", tval_hours); state_.time_units.add("day", tval_days); state_.time_units.add("week", tval_weeks); state_.time_units.add("year", tval_years); } template struct definition { parser_grammar const &self_; definition(parser_grammar const &self) : self_(self) { /* * A literal value. Either a string, a floating * pointer number or an integer. */ value = reduced_node_d[ lexeme_d[ (+chset<>("0-9") >> '.' >> +chset<>("0-9")) | ( '.' >> +chset<>("0-9")) | (+chset<>("0-9") >> '.' ) ] ] | as_lower_d[ leaf_node_d[ +chset<>("0-7") >> 'o' | +chset<>("0-9a-f") >> 'x' | +chset<>("0-1") >> 'b' | +chset<>("0-9") ] ] | date | string ; hexchar = chset<>("a-fA-F0-9") ; octchar = chset<>("0-7") ; c_string_char = "\\x" >> hexchar >> hexchar | "\\u" >> repeat_p(4)[hexchar] | "\\U" >> repeat_p(8)[hexchar] | "\\o" >> octchar >> octchar >> octchar | "\\" >> anychar_p - (ch_p('x') | 'u' | 'o') | anychar_p - (ch_p('"') | '\\') ; string = inner_node_d[ '"' >> leaf_node_d[ *(c_string_char) ] >> '"' ] ; date = inner_node_d[ '"' >> leaf_node_d[ *(anychar_p - '"') ] >> "\"d" ] ; /* * A variable. If the variable is found in the * user-supplied variable list, we use that. * Otherwise, unknown variables (containing uppercase * letters and underscore only) are returned as the * empty string. */ variable = reduced_node_d[ +(upper_p | '_') ] ; /* * A function call: func([arg[, arg...]]). */ function = ( root_node_d[ reduced_node_d[ +(lower_p | '_') ] ] >> inner_node_d[ '(' >> ( tern_expr % discard_node_d[ch_p(',')] ) >> ')' ] ) ; if_then_expr = root_node_d[ str_p("if") ] >> tern_expr >> discard_node_d[ str_p("then") ] >> tern_expr >> discard_node_d[ str_p("else") ] >> tern_expr >> discard_node_d[ str_p("end") ] ; /* * A basic atomic value. Either a variable, function * or literal, or a negated expression !a, or a * parenthesised expression (a). */ basic = value | variable | function | if_then_expr | inner_node_d[ '(' >> tern_expr >> ')' ] | root_node_d[ch_p('!')] >> tern_expr | root_node_d[ch_p('+')] >> tern_expr | root_node_d[ch_p('-')] >> tern_expr ; time_unit = basic >> !( root_node_d[ self.state_.time_units ] ) ; /* * "a in b" operator */ in_expr = time_unit >> *( root_node_d[ self.in_opers ] >> time_unit ) ; /* * power-of. This is right-associative. */ pow_expr = in_expr >> !( root_node_d[ str_p("**") ] >> pow_expr ) ; /* * Multiplication and operators with the same * precedence. */ mult_expr = pow_expr >> *( root_node_d[ self.mult_opers ] >> pow_expr ) ; /* * Additional and operators with the same precedence. */ plus_expr = mult_expr >> *( root_node_d[ self.plus_opers ] >> mult_expr ) ; /* * Ordinal comparisons and operators with the same * precedence. */ ord_expr = plus_expr >> *( root_node_d[ self.ord_opers ] >> plus_expr ) ; /* * Equality comparisons. */ eq_expr = ord_expr >> *( root_node_d[ self.eq_opers ] >> ord_expr ) ; /* * Boolean expressions. */ bool_expr = eq_expr >> *( root_node_d[ self.bool_opers ] >> eq_expr ) ; comma_expr = bool_expr >> *( root_node_d[ str_p(",") ] >> bool_expr ) ; /* * The ternary operator. Notice this is * right-associative: a ? b ? c : d : e * is supported. */ tern_expr = comma_expr >> !( root_node_d[ch_p('?')] >> tern_expr >> discard_node_d[ch_p(':')] >> tern_expr ) ; } rule, parser_tag > const &start() const { return tern_expr; } rule c_string_char, hexchar, octchar; rule, parser_tag > value; rule, parser_tag > variable; rule, parser_tag > basic; rule, parser_tag > bool_expr; rule, parser_tag > ord_expr; rule, parser_tag > eq_expr; rule, parser_tag > pow_expr; rule, parser_tag > mult_expr; rule, parser_tag > plus_expr; rule, parser_tag > in_expr; rule, parser_tag > date; rule, parser_tag > time_unit; rule, parser_tag > comma_expr; rule, parser_tag > if_then_expr; rule, parser_tag > function; rule, parser_tag > tern_expr; rule, parser_tag > string; }; }; expressor::expressor() { grammar_ = new parser_grammar(state_); /* * We provide a couple of standard variables everyone wants. */ add_variable(make_astring("true"), afp::u32datum::from_int(true)); add_variable(make_astring("false"), afp::u32datum::from_int(false)); /* * The cast functions. */ add_function(make_astring("int"), &f_int); add_function(make_astring("string"), &f_string); add_function(make_astring("float"), &f_float); } expressor::~expressor() { delete grammar_; } /* * The user interface to evaluate an expression. It returns the result, or * throws an exception if an error occurs. */ u32datum expressor::evaluate(u32fray const &filter) const { using namespace boost::spirit; typedef u32fray::const_iterator iterator_t; u32datum ret; tree_parse_info info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p, chset<>("\r\n\t ") | comment_p("/*", "*/")); if (info.full) { ast_evaluator ae(state_); return ae.tree_eval(info.trees.begin()); } else { throw parse_error("parsing failed"); } } void expressor::print_xml(std::ostream &strm, u32fray const &filter) const { using namespace boost::spirit; typedef u32fray::const_iterator iterator_t; tree_parse_info info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p, +chset<>("\n\t ") | comment_p("/*", "*/")); if (info.full) { std::map rule_names; rule_names[pid_value] = "value"; rule_names[pid_variable] = "variable"; rule_names[pid_basic] = "basic"; rule_names[pid_bool_expr] = "bool_expr"; rule_names[pid_ord_expr] = "ord_expr"; rule_names[pid_eq_expr] = "eq_expr"; rule_names[pid_pow_expr] = "pow_expr"; rule_names[pid_mult_expr] = "mult_expr"; rule_names[pid_plus_expr] = "plus_expr"; rule_names[pid_in_expr] = "in_expr"; rule_names[pid_function] = "function"; rule_names[pid_tern_expr] = "tern_expr"; rule_names[pid_string] = "string"; rule_names[pid_date] = "date"; rule_names[pid_time_unit] = "time_unit"; rule_names[pid_comma_expr] = "comma_expr"; rule_names[pid_ifthen] = "if_then_expr"; tree_to_xml(strm, info.trees, "", rule_names); } else { throw parse_error("parsing failed"); } } void expressor::clear() { clear_variables(); clear_functions(); } void expressor::clear_variables() { symbols variables; state_.variables = variables; } void expressor::clear_functions() { symbols)>, UChar32> functions; state_.functions = functions; } void expressor::add_variable(u32fray const &name, u32datum const &value) { grammar_->add_variable(name, value); } void expressor::add_function(u32fray const &name, func_t value) { grammar_->add_function(name, value); } } // namespace afp