mediawiki-extensions-AbuseF.../parser_native/parser.cpp

/*
 * Copyright (c) 2008 Andrew Garrett.
 * Copyright (c) 2008 River Tarnell <river@wikimedia.org>
 * Derived from public domain code contributed by Victor Vasiliev.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely. This software is provided 'as-is', without any express or
 * implied warranty.
 */

#include	<boost/spirit/core.hpp>
#include	<boost/spirit/utility/confix.hpp>
#include	<boost/spirit/utility/chset.hpp>
#include	<boost/spirit/utility/loops.hpp>
#include	<boost/spirit/tree/ast.hpp>
#include	<boost/spirit/tree/tree_to_xml.hpp>
#include	<boost/spirit/symbols.hpp>
#include	<boost/spirit/utility/escape_char.hpp>
#include	<boost/function.hpp>
#include	<boost/noncopyable.hpp>
#include	<boost/format.hpp>
#include	<boost/regex/icu.hpp>

#include	"parser.h"
#include	"ast.h"

/*
 *                    ABUSEFILTER EXPRESSION PARSER
 *                    =============================
 *
 * This is the basic expression parser.  It doesn't contain any AF logic
 * itself, but rather presents an interface for the user to add custom
 * functions and variables.
 *
 * The interface to the parser is the 'expressor' class.  Use it like this:
 *
 *   expressor e;
 *   e.add_variable("ONE", 1);
 *   e.add_function("f", myfunc);
 *   e.evaluate("ONE + 2");     -- returns 3 
 * 
 * Custom functions should have the following prototype:
 *
 *   afp::u32datum (std::vector<afp::u32datum> const &args);
 *
 * Functions must return a value; they cannot be void.  The arguments passed to
 * the function are stored in the 'args' array in left-to-right order.
 *
 * The parser implements a C-like grammar with some differences.  The following
 * operators are available:
 *
 *   a & b		true if a and b are both true
 *   a | b		true if either a or b is true
 *   a ^ b		true if either a or b is true, but not if both are true
 *   a + b		arithmetic
 *   a - b
 *   a * b
 *   a / b
 *   a % b
 *   a ** b		power-of (a^b)
 *   a in b		true if the string "b" contains the substring "a"
 *   a contains b	true if b contains the string a
 *   a like b		true if a matches the Unix glob b
 *   a matches b	'' ''
 *   a rlike b		true if a matches the Perl regex b
 *   a regex b		'' ''
 *   !a			true if a is false
 *   (a)		same value as a
 *   a ? b : c		if a is true, returns the value of b, otherwise c
 *   a == b		comparison operators
 *   a != b
 *   a < b
 *   a <= b
 *   a > b
 *   a >= b
 *   a === b		returns true if a==b and both are the same type
 *   a !== b		return true if a != b or they are different types
 *
 * The parser uses afp::datum for its variables.  This means it supports
 * strings, ints and floats, with automatic conversion between types.
 *
 * String constants are C-style.  The standard C escapes \a \b \f \t \r \n \v are
 * supported.  \xHH encodes a 1-byte Unicode character, \uHHHH encodes a 2-byte
 * Unicode characters, and \UHHHHHHHH encodes a 4-byte Unicode character.
 *
 * Numeric constants can be integers (e.g. 1), or floating pointers (e.g.
 * 1., .1, 1.2).
 *
 * Function calls are f(arg1, arg2, ...).  
 */

namespace afp {

using namespace boost::spirit;

/*
 * The grammar itself.
 */
struct parser_grammar : public grammar<parser_grammar>
{
	parser_state &state_;

	void add_variable(u32fray const &name, u32datum const &value) {
		state_.variables.add(name.c_str(), value);
	}

	void add_function(
			u32fray name,
			boost::function<u32datum (std::vector<u32datum>)> func) {
		state_.functions.add(name.c_str(), func);
	}

	symbols<int, UChar32> eq_opers, ord_opers, plus_opers, mult_opers, in_opers, bool_opers;

	parser_grammar(parser_state &state) : state_(state) {
		eq_opers.add("=", 0);
		eq_opers.add("==", 0);
		eq_opers.add("===", 0);
		eq_opers.add("!=", 0);
		eq_opers.add("!==", 0);
		eq_opers.add("/=", 0);
		ord_opers.add("<", 0);
		ord_opers.add("<=", 0);
		ord_opers.add(">", 0);
		ord_opers.add(">=", 0);
		plus_opers.add("+", 0);
		plus_opers.add("-", 0);
		mult_opers.add("*", 0);
		mult_opers.add("/", 0);
		mult_opers.add("%", 0);
		bool_opers.add("&", 0);
		bool_opers.add("|", 0);
		bool_opers.add("^", 0);
		in_opers.add("in", 0);
		in_opers.add("contains", 0);
		in_opers.add("matches", 0);
		in_opers.add("like", 0);
		in_opers.add("rlike", 0);
		in_opers.add("regex", 0);
		state_.time_units.add("seconds", tval_seconds);
		state_.time_units.add("minutes", tval_minutes);
		state_.time_units.add("hours", tval_hours);
		state_.time_units.add("days", tval_days);
		state_.time_units.add("weeks", tval_weeks);
		state_.time_units.add("years", tval_years);
		state_.time_units.add("second", tval_seconds);
		state_.time_units.add("minute", tval_minutes);
		state_.time_units.add("hour", tval_hours);
		state_.time_units.add("day", tval_days);
		state_.time_units.add("week", tval_weeks);
		state_.time_units.add("year", tval_years);
	}

	template<typename ScannerT>
	struct definition
	{
		parser_grammar const &self_;
 
		definition(parser_grammar const &self) 
		: self_(self)
		{
			/*
			 * A literal value.  Either a string, a floating
			 * pointer number or an integer.
			 */
			value = 
				  reduced_node_d[ lexeme_d[
				  	  (+chset<>("0-9") >> '.' >> +chset<>("0-9"))
					| (                   '.' >> +chset<>("0-9"))
					| (+chset<>("0-9") >> '.'                   )
				  ] ]
				| as_lower_d[ leaf_node_d[
					  +chset<>("0-7") >> 'o'
					| +chset<>("0-9a-f") >> 'x'
					| +chset<>("0-1") >> 'b'
					| +chset<>("0-9")
				] ]
				| date
				| string
				;

			hexchar = chset<>("a-fA-F0-9")
				;

			octchar = chset<>("0-7")
				;

			c_string_char =
				  "\\x" >> hexchar >> hexchar
				| "\\u" >> repeat_p(4)[hexchar]
				| "\\U" >> repeat_p(8)[hexchar]
				| "\\o" >> octchar >> octchar >> octchar
				| "\\" >> anychar_p - (ch_p('x') | 'u' | 'o')
				| anychar_p - (ch_p('"') | '\\')
				;

			string = inner_node_d[
					   '"'
					>> leaf_node_d[ *(c_string_char) ]
					>> '"'
				]
				;

			date =
				inner_node_d[
					   '"'
					>> leaf_node_d[ *(anychar_p - '"') ]
					>> "\"d"
				]
				;

			/*
			 * A variable.  If the variable is found in the
			 * user-supplied variable list, we use that.
			 * Otherwise, unknown variables (containing uppercase
			 * letters and underscore only) are returned as the
			 * empty string.
			 */
			variable = reduced_node_d[ +(upper_p | '_') ]
				;

			/*
			 * A function call: func([arg[, arg...]]).
			 */
			function = 
				  (
					   root_node_d[ reduced_node_d[
					   	+(lower_p | '_')
					   ] ]
					>> inner_node_d[
						   '('
						>> ( tern_expr % discard_node_d[ch_p(',')] )
						>> ')'
					   ]
				  )
				;

			if_then_expr =
				   root_node_d[ str_p("if") ]
				>> tern_expr
				>> discard_node_d[ str_p("then") ]
				>> tern_expr
				>> discard_node_d[ str_p("else") ]
				>> tern_expr
				>> discard_node_d[ str_p("end") ]
				;

			/*
			 * A basic atomic value.  Either a variable, function
			 * or literal, or a negated expression !a, or a
			 * parenthesised expression (a).
			 */
			basic =
				  value
				| variable
				| function
				| if_then_expr
				| inner_node_d[ '(' >> tern_expr >> ')' ]
				| root_node_d[ch_p('!')] >> tern_expr
				| root_node_d[ch_p('+')] >> tern_expr 
				| root_node_d[ch_p('-')] >> tern_expr
				;

			time_unit =
				   basic
				>> !( root_node_d[ self.state_.time_units ] )
				;

			/*
			 * "a in b" operator
			 */
			in_expr = 
				  time_unit
				>> *( root_node_d[ self.in_opers ] >> time_unit )
				;

			/*
			 * power-of.  This is right-associative. 
			 */
			pow_expr =
				   in_expr
				>> !( root_node_d[ str_p("**") ] >> pow_expr )
				;

			/*
			 * Multiplication and operators with the same
			 * precedence.
			 */
			mult_expr =
				   pow_expr
				>> *( root_node_d[ self.mult_opers ] >> pow_expr )
				;

			/*
			 * Additional and operators with the same precedence.
			 */
			plus_expr =
				   mult_expr
				>> *( root_node_d[ self.plus_opers ] >> mult_expr )
				;

			/*
			 * Ordinal comparisons and operators with the same
			 * precedence.
			 */
			ord_expr  =
				   plus_expr
				>> *( root_node_d[ self.ord_opers ] >> plus_expr )
				;

			/*
			 * Equality comparisons.
			 */
			eq_expr =
				   ord_expr
				>> *( root_node_d[ self.eq_opers ] >> ord_expr )
				;

			/*
			 * Boolean expressions.
			 */
			bool_expr =
				  eq_expr
				>> *( root_node_d[ self.bool_opers ] >> eq_expr )
				;

			comma_expr =
				   bool_expr
				>> *( root_node_d[ str_p(",") ] >> bool_expr )
				;

			/*
			 * The ternary operator.  Notice this is
			 * right-associative: a ? b ? c : d : e
			 * is supported.
			 */
			tern_expr =
				   comma_expr
				>> !(
					   root_node_d[ch_p('?')] >> tern_expr
					>> discard_node_d[ch_p(':')] >> tern_expr
				   )
				;


		}

		rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> >
		const &start() const {
			return tern_expr;
		}

		rule<ScannerT> c_string_char, hexchar, octchar;
		rule<ScannerT, parser_context<>, parser_tag<pid_value> > value;
		rule<ScannerT, parser_context<>, parser_tag<pid_variable> > variable;
		rule<ScannerT, parser_context<>, parser_tag<pid_basic> > basic;
		rule<ScannerT, parser_context<>, parser_tag<pid_bool_expr> > bool_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_ord_expr> > ord_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_eq_expr> > eq_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_pow_expr> > pow_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_mult_expr> > mult_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_plus_expr> > plus_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_in_expr> > in_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_date> > date;
		rule<ScannerT, parser_context<>, parser_tag<pid_time_unit> > time_unit;
		rule<ScannerT, parser_context<>, parser_tag<pid_comma_expr> > comma_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_ifthen> > if_then_expr;

		rule<ScannerT, parser_context<>, parser_tag<pid_function> > function;
		rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> > tern_expr;
		rule<ScannerT, parser_context<>, parser_tag<pid_string> > string;
	};
};

expressor::expressor()
{
	grammar_ = new parser_grammar(state_);
	/*
	 * We provide a couple of standard variables everyone wants.
	 */
	add_variable(make_astring<UChar32>("true"), afp::u32datum::from_int(true));
	add_variable(make_astring<UChar32>("false"), afp::u32datum::from_int(false));

	/*
	 * The cast functions.
	 */
	add_function(make_astring<UChar32>("int"), &f_int<UChar32>);
	add_function(make_astring<UChar32>("string"), &f_string<UChar32>);
	add_function(make_astring<UChar32>("float"), &f_float<UChar32>);
}

expressor::~expressor()
{
	delete grammar_;
}

/*
 * The user interface to evaluate an expression.  It returns the result, or
 * throws an exception if an error occurs.
 */
u32datum
expressor::evaluate(u32fray const &filter) const
{
	using namespace boost::spirit;

	typedef u32fray::const_iterator iterator_t;

	u32datum ret;

	tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p,
			chset<>("\r\n\t ") | comment_p("/*", "*/"));

	if (info.full) {
		ast_evaluator ae(state_);
		return ae.tree_eval(info.trees.begin());
	} else {
		throw parse_error("parsing failed");
	}
}

void
expressor::print_xml(std::ostream &strm, u32fray const &filter) const
{
	using namespace boost::spirit;

	typedef u32fray::const_iterator iterator_t;

	tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p,
			+chset<>("\n\t ") | comment_p("/*", "*/"));

	if (info.full) {
		std::map<parser_id, std::string> rule_names;
		rule_names[pid_value] = "value";
		rule_names[pid_variable] = "variable";
		rule_names[pid_basic] = "basic";
		rule_names[pid_bool_expr] = "bool_expr";
		rule_names[pid_ord_expr] = "ord_expr";
		rule_names[pid_eq_expr] = "eq_expr";
		rule_names[pid_pow_expr] = "pow_expr";
		rule_names[pid_mult_expr] = "mult_expr";
		rule_names[pid_plus_expr] = "plus_expr";
		rule_names[pid_in_expr] = "in_expr";
		rule_names[pid_function] = "function";
		rule_names[pid_tern_expr] = "tern_expr";
		rule_names[pid_string] = "string";
		rule_names[pid_date] = "date";
		rule_names[pid_time_unit] = "time_unit";
		rule_names[pid_comma_expr] = "comma_expr";
		rule_names[pid_ifthen] = "if_then_expr";
		tree_to_xml(strm, info.trees, "", rule_names);
	} else {
		throw parse_error("parsing failed");
	}
}

void
expressor::clear()
{
	clear_variables();
	clear_functions();
}

void
expressor::clear_variables()
{
	symbols<u32datum, UChar32> variables;
	state_.variables = variables;
}

void
expressor::clear_functions()
{
	symbols<boost::function<u32datum (std::vector<u32datum>)>, UChar32> functions;
	state_.functions = functions;
}

void
expressor::add_variable(u32fray const &name, u32datum const &value)
{
	grammar_->add_variable(name, value);
}

void
expressor::add_function(u32fray const &name, func_t value)
{
	grammar_->add_function(name, value);
}

} // namespace afp
- untemplatise parser and ast, force Unicode (UChar32) only - set an svn:ignore 2008-08-12 14:02:33 +00:00			`/*`
			`* Copyright (c) 2008 Andrew Garrett.`
			`* Copyright (c) 2008 River Tarnell <river@wikimedia.org>`
			`* Derived from public domain code contributed by Victor Vasiliev.`
			`*`
			`* Permission is granted to anyone to use this software for any purpose,`
			`* including commercial applications, and to alter it and redistribute it`
			`* freely. This software is provided 'as-is', without any express or`
			`* implied warranty.`
			`*/`

			`#include <boost/spirit/core.hpp>`
			`#include <boost/spirit/utility/confix.hpp>`
			`#include <boost/spirit/utility/chset.hpp>`
			`#include <boost/spirit/utility/loops.hpp>`
			`#include <boost/spirit/tree/ast.hpp>`
			`#include <boost/spirit/tree/tree_to_xml.hpp>`
			`#include <boost/spirit/symbols.hpp>`
			`#include <boost/spirit/utility/escape_char.hpp>`
			`#include <boost/function.hpp>`
			`#include <boost/noncopyable.hpp>`
			`#include <boost/format.hpp>`
			`#include <boost/regex/icu.hpp>`

			`#include "parser.h"`
			`#include "ast.h"`

			`/*`
			`* ABUSEFILTER EXPRESSION PARSER`
			`* =============================`
			`*`
			`* This is the basic expression parser. It doesn't contain any AF logic`
			`* itself, but rather presents an interface for the user to add custom`
			`* functions and variables.`
			`*`
			`* The interface to the parser is the 'expressor' class. Use it like this:`
			`*`
			`* expressor e;`
			`* e.add_variable("ONE", 1);`
			`* e.add_function("f", myfunc);`
			`* e.evaluate("ONE + 2"); -- returns 3`
			`*`
			`* Custom functions should have the following prototype:`
			`*`
			`* afp::u32datum (std::vector<afp::u32datum> const &args);`
			`*`
			`* Functions must return a value; they cannot be void. The arguments passed to`
			`* the function are stored in the 'args' array in left-to-right order.`
			`*`
			`* The parser implements a C-like grammar with some differences. The following`
			`* operators are available:`
			`*`
			`* a & b true if a and b are both true`
			`* a \| b true if either a or b is true`
			`* a ^ b true if either a or b is true, but not if both are true`
			`* a + b arithmetic`
			`* a - b`
			`* a * b`
			`* a / b`
			`* a % b`
			`* a ** b power-of (a^b)`
			`* a in b true if the string "b" contains the substring "a"`
			`* a contains b true if b contains the string a`
			`* a like b true if a matches the Unix glob b`
			`* a matches b '' ''`
			`* a rlike b true if a matches the Perl regex b`
			`* a regex b '' ''`
			`* !a true if a is false`
			`* (a) same value as a`
			`* a ? b : c if a is true, returns the value of b, otherwise c`
			`* a == b comparison operators`
			`* a != b`
			`* a < b`
			`* a <= b`
			`* a > b`
			`* a >= b`
			`* a === b returns true if a==b and both are the same type`
			`* a !== b return true if a != b or they are different types`
			`*`
			`* The parser uses afp::datum for its variables. This means it supports`
			`* strings, ints and floats, with automatic conversion between types.`
			`*`
			`* String constants are C-style. The standard C escapes \a \b \f \t \r \n \v are`
			`* supported. \xHH encodes a 1-byte Unicode character, \uHHHH encodes a 2-byte`
			`* Unicode characters, and \UHHHHHHHH encodes a 4-byte Unicode character.`
			`*`
			`* Numeric constants can be integers (e.g. 1), or floating pointers (e.g.`
			`* 1., .1, 1.2).`
			`*`
			`* Function calls are f(arg1, arg2, ...).`
			`*/`

			`namespace afp {`

			`using namespace boost::spirit;`

			`/*`
			`* The grammar itself.`
			`*/`
			`struct parser_grammar : public grammar<parser_grammar>`
			`{`
			`parser_state &state_;`

			`void add_variable(u32fray const &name, u32datum const &value) {`
			`state_.variables.add(name.c_str(), value);`
			`}`

			`void add_function(`
			`u32fray name,`
			`boost::function<u32datum (std::vector<u32datum>)> func) {`
			`state_.functions.add(name.c_str(), func);`
			`}`

			`symbols<int, UChar32> eq_opers, ord_opers, plus_opers, mult_opers, in_opers, bool_opers;`

			`parser_grammar(parser_state &state) : state_(state) {`
			`eq_opers.add("=", 0);`
			`eq_opers.add("==", 0);`
			`eq_opers.add("===", 0);`
			`eq_opers.add("!=", 0);`
			`eq_opers.add("!==", 0);`
			`eq_opers.add("/=", 0);`
			`ord_opers.add("<", 0);`
			`ord_opers.add("<=", 0);`
			`ord_opers.add(">", 0);`
			`ord_opers.add(">=", 0);`
			`plus_opers.add("+", 0);`
			`plus_opers.add("-", 0);`
			`mult_opers.add("*", 0);`
			`mult_opers.add("/", 0);`
			`mult_opers.add("%", 0);`
			`bool_opers.add("&", 0);`
			`bool_opers.add("\|", 0);`
			`bool_opers.add("^", 0);`
			`in_opers.add("in", 0);`
			`in_opers.add("contains", 0);`
			`in_opers.add("matches", 0);`
			`in_opers.add("like", 0);`
			`in_opers.add("rlike", 0);`
			`in_opers.add("regex", 0);`
			`state_.time_units.add("seconds", tval_seconds);`
			`state_.time_units.add("minutes", tval_minutes);`
			`state_.time_units.add("hours", tval_hours);`
			`state_.time_units.add("days", tval_days);`
			`state_.time_units.add("weeks", tval_weeks);`
			`state_.time_units.add("years", tval_years);`
			`state_.time_units.add("second", tval_seconds);`
			`state_.time_units.add("minute", tval_minutes);`
			`state_.time_units.add("hour", tval_hours);`
			`state_.time_units.add("day", tval_days);`
			`state_.time_units.add("week", tval_weeks);`
			`state_.time_units.add("year", tval_years);`
			`}`

			`template<typename ScannerT>`
			`struct definition`
			`{`
			`parser_grammar const &self_;`

			`definition(parser_grammar const &self)`
			`: self_(self)`
			`{`
			`/*`
			`* A literal value. Either a string, a floating`
			`* pointer number or an integer.`
			`*/`
			`value =`
			`reduced_node_d[ lexeme_d[`
			`(+chset<>("0-9") >> '.' >> +chset<>("0-9"))`
			`\| ( '.' >> +chset<>("0-9"))`
			`\| (+chset<>("0-9") >> '.' )`
			`] ]`
			`\| as_lower_d[ leaf_node_d[`
			`+chset<>("0-7") >> 'o'`
			`\| +chset<>("0-9a-f") >> 'x'`
			`\| +chset<>("0-1") >> 'b'`
			`\| +chset<>("0-9")`
			`] ]`
			`\| date`
			`\| string`
			`;`

			`hexchar = chset<>("a-fA-F0-9")`
			`;`

			`octchar = chset<>("0-7")`
			`;`

			`c_string_char =`
			`"\\x" >> hexchar >> hexchar`
			`\| "\\u" >> repeat_p(4)[hexchar]`
			`\| "\\U" >> repeat_p(8)[hexchar]`
			`\| "\\o" >> octchar >> octchar >> octchar`
			`\| "\\" >> anychar_p - (ch_p('x') \| 'u' \| 'o')`
			`\| anychar_p - (ch_p('"') \| '\\')`
			`;`

			`string = inner_node_d[`
			`'"'`
			`>> leaf_node_d[ *(c_string_char) ]`
			`>> '"'`
			`]`
			`;`

			`date =`
			`inner_node_d[`
			`'"'`
			`>> leaf_node_d[ *(anychar_p - '"') ]`
			`>> "\"d"`
			`]`
			`;`

			`/*`
			`* A variable. If the variable is found in the`
			`* user-supplied variable list, we use that.`
			`* Otherwise, unknown variables (containing uppercase`
			`* letters and underscore only) are returned as the`
			`* empty string.`
			`*/`
			`variable = reduced_node_d[ +(upper_p \| '_') ]`
			`;`

			`/*`
			`* A function call: func([arg[, arg...]]).`
			`*/`
			`function =`
			`(`
			`root_node_d[ reduced_node_d[`
			`+(lower_p \| '_')`
			`] ]`
			`>> inner_node_d[`
			`'('`
			`>> ( tern_expr % discard_node_d[ch_p(',')] )`
			`>> ')'`
			`]`
			`)`
			`;`

add 'if <expr> then <iftrue> else <iffalse> end' as an alternative syntax to expr ? iftrue : iffalse 2008-08-12 16:40:51 +00:00			`if_then_expr =`
			`root_node_d[ str_p("if") ]`
			`>> tern_expr`
			`>> discard_node_d[ str_p("then") ]`
			`>> tern_expr`
			`>> discard_node_d[ str_p("else") ]`
			`>> tern_expr`
			`>> discard_node_d[ str_p("end") ]`
			`;`
- untemplatise parser and ast, force Unicode (UChar32) only - set an svn:ignore 2008-08-12 14:02:33 +00:00
			`/*`
			`* A basic atomic value. Either a variable, function`
			`* or literal, or a negated expression !a, or a`
			`* parenthesised expression (a).`
			`*/`
			`basic =`
			`value`
			`\| variable`
			`\| function`
add 'if <expr> then <iftrue> else <iffalse> end' as an alternative syntax to expr ? iftrue : iffalse 2008-08-12 16:40:51 +00:00			`\| if_then_expr`
- untemplatise parser and ast, force Unicode (UChar32) only - set an svn:ignore 2008-08-12 14:02:33 +00:00			`\| inner_node_d[ '(' >> tern_expr >> ')' ]`
			`\| root_node_d[ch_p('!')] >> tern_expr`
			`\| root_node_d[ch_p('+')] >> tern_expr`
			`\| root_node_d[ch_p('-')] >> tern_expr`
			`;`

			`time_unit =`
			`basic`
			`>> !( root_node_d[ self.state_.time_units ] )`
			`;`

			`/*`
			`* "a in b" operator`
			`*/`
			`in_expr =`
			`time_unit`
			`>> *( root_node_d[ self.in_opers ] >> time_unit )`
			`;`

			`/*`
			`* power-of. This is right-associative.`
			`*/`
			`pow_expr =`
			`in_expr`
			`>> !( root_node_d[ str_p("**") ] >> pow_expr )`
			`;`

			`/*`
			`* Multiplication and operators with the same`
			`* precedence.`
			`*/`
			`mult_expr =`
			`pow_expr`
			`>> *( root_node_d[ self.mult_opers ] >> pow_expr )`
			`;`

			`/*`
			`* Additional and operators with the same precedence.`
			`*/`
			`plus_expr =`
			`mult_expr`
			`>> *( root_node_d[ self.plus_opers ] >> mult_expr )`
			`;`

			`/*`
			`* Ordinal comparisons and operators with the same`
			`* precedence.`
			`*/`
			`ord_expr =`
			`plus_expr`
			`>> *( root_node_d[ self.ord_opers ] >> plus_expr )`
			`;`

			`/*`
			`* Equality comparisons.`
			`*/`
			`eq_expr =`
			`ord_expr`
			`>> *( root_node_d[ self.eq_opers ] >> ord_expr )`
			`;`

			`/*`
			`* Boolean expressions.`
			`*/`
			`bool_expr =`
			`eq_expr`
			`>> *( root_node_d[ self.bool_opers ] >> eq_expr )`
			`;`

			`comma_expr =`
			`bool_expr`
			`>> *( root_node_d[ str_p(",") ] >> bool_expr )`
			`;`

			`/*`
			`* The ternary operator. Notice this is`
			`* right-associative: a ? b ? c : d : e`
			`* is supported.`
			`*/`
			`tern_expr =`
			`comma_expr`
			`>> !(`
			`root_node_d[ch_p('?')] >> tern_expr`
			`>> discard_node_d[ch_p(':')] >> tern_expr`
			`)`
			`;`
add 'if <expr> then <iftrue> else <iffalse> end' as an alternative syntax to expr ? iftrue : iffalse 2008-08-12 16:40:51 +00:00

- untemplatise parser and ast, force Unicode (UChar32) only - set an svn:ignore 2008-08-12 14:02:33 +00:00			`}`

			`rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> >`
			`const &start() const {`
			`return tern_expr;`
			`}`

			`rule<ScannerT> c_string_char, hexchar, octchar;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_value> > value;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_variable> > variable;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_basic> > basic;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_bool_expr> > bool_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_ord_expr> > ord_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_eq_expr> > eq_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_pow_expr> > pow_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_mult_expr> > mult_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_plus_expr> > plus_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_in_expr> > in_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_date> > date;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_time_unit> > time_unit;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_comma_expr> > comma_expr;`
add 'if <expr> then <iftrue> else <iffalse> end' as an alternative syntax to expr ? iftrue : iffalse 2008-08-12 16:40:51 +00:00			`rule<ScannerT, parser_context<>, parser_tag<pid_ifthen> > if_then_expr;`
- untemplatise parser and ast, force Unicode (UChar32) only - set an svn:ignore 2008-08-12 14:02:33 +00:00
			`rule<ScannerT, parser_context<>, parser_tag<pid_function> > function;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_tern_expr> > tern_expr;`
			`rule<ScannerT, parser_context<>, parser_tag<pid_string> > string;`
			`};`
			`};`

			`expressor::expressor()`
			`{`
			`grammar_ = new parser_grammar(state_);`
			`/*`
			`* We provide a couple of standard variables everyone wants.`
			`*/`
			`add_variable(make_astring<UChar32>("true"), afp::u32datum::from_int(true));`
			`add_variable(make_astring<UChar32>("false"), afp::u32datum::from_int(false));`

			`/*`
			`* The cast functions.`
			`*/`
			`add_function(make_astring<UChar32>("int"), &f_int<UChar32>);`
			`add_function(make_astring<UChar32>("string"), &f_string<UChar32>);`
			`add_function(make_astring<UChar32>("float"), &f_float<UChar32>);`
			`}`

			`expressor::~expressor()`
			`{`
			`delete grammar_;`
			`}`

			`/*`
			`* The user interface to evaluate an expression. It returns the result, or`
			`* throws an exception if an error occurs.`
			`*/`
			`u32datum`
			`expressor::evaluate(u32fray const &filter) const`
			`{`
			`using namespace boost::spirit;`

			`typedef u32fray::const_iterator iterator_t;`

			`u32datum ret;`

			`tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p,`
			`chset<>("\r\n\t ") \| comment_p("/", "/"));`

			`if (info.full) {`
			`ast_evaluator ae(state_);`
			`return ae.tree_eval(info.trees.begin());`
			`} else {`
			`throw parse_error("parsing failed");`
			`}`
			`}`

			`void`
			`expressor::print_xml(std::ostream &strm, u32fray const &filter) const`
			`{`
			`using namespace boost::spirit;`

			`typedef u32fray::const_iterator iterator_t;`

			`tree_parse_info<iterator_t> info = ast_parse(filter.begin(), filter.end(), *grammar_ >> end_p,`
			`+chset<>("\n\t ") \| comment_p("/", "/"));`

			`if (info.full) {`
			`std::map<parser_id, std::string> rule_names;`
			`rule_names[pid_value] = "value";`
			`rule_names[pid_variable] = "variable";`
			`rule_names[pid_basic] = "basic";`
			`rule_names[pid_bool_expr] = "bool_expr";`
			`rule_names[pid_ord_expr] = "ord_expr";`
			`rule_names[pid_eq_expr] = "eq_expr";`
			`rule_names[pid_pow_expr] = "pow_expr";`
			`rule_names[pid_mult_expr] = "mult_expr";`
			`rule_names[pid_plus_expr] = "plus_expr";`
			`rule_names[pid_in_expr] = "in_expr";`
			`rule_names[pid_function] = "function";`
			`rule_names[pid_tern_expr] = "tern_expr";`
			`rule_names[pid_string] = "string";`
			`rule_names[pid_date] = "date";`
			`rule_names[pid_time_unit] = "time_unit";`
			`rule_names[pid_comma_expr] = "comma_expr";`
add 'if <expr> then <iftrue> else <iffalse> end' as an alternative syntax to expr ? iftrue : iffalse 2008-08-12 16:40:51 +00:00			`rule_names[pid_ifthen] = "if_then_expr";`
- untemplatise parser and ast, force Unicode (UChar32) only - set an svn:ignore 2008-08-12 14:02:33 +00:00			`tree_to_xml(strm, info.trees, "", rule_names);`
			`} else {`
			`throw parse_error("parsing failed");`
			`}`
			`}`

			`void`
			`expressor::clear()`
			`{`
			`clear_variables();`
			`clear_functions();`
			`}`

			`void`
			`expressor::clear_variables()`
			`{`
			`symbols<u32datum, UChar32> variables;`
			`state_.variables = variables;`
			`}`

			`void`
			`expressor::clear_functions()`
			`{`
			`symbols<boost::function<u32datum (std::vector<u32datum>)>, UChar32> functions;`
			`state_.functions = functions;`
			`}`

			`void`
			`expressor::add_variable(u32fray const &name, u32datum const &value)`
			`{`
			`grammar_->add_variable(name, value);`
			`}`

			`void`
			`expressor::add_function(u32fray const &name, func_t value)`
			`{`
			`grammar_->add_function(name, value);`
			`}`

			`} // namespace afp`