mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-27 15:30:42 +00:00
improved string literal parser. now \xHH works as expected (only matches two digits), and \uHHHH, \UHHHHHHHH are supported (16/32-bit Unicode characters)
This commit is contained in:
parent
9911042c66
commit
10e89fda5c
|
@ -12,6 +12,32 @@
|
|||
#ifndef AST_H
|
||||
#define AST_H
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename charT>
|
||||
int
|
||||
hex2int(charT const *str, int ndigits)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
while (ndigits--) {
|
||||
ret *= 0x10;
|
||||
if (*str >= 'a' && *str <= 'f')
|
||||
ret += 10 + int(*str - 'a');
|
||||
else if (*str >= 'A' && *str <= 'F')
|
||||
ret += 10 + int(*str - 'A');
|
||||
else if (*str >= '0' && *str <= '9')
|
||||
ret += int(*str - '0');
|
||||
|
||||
str++;
|
||||
}
|
||||
|
||||
std::cerr << "hex2int: " << ret << '\n';
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace afp {
|
||||
|
||||
template<typename T> struct parser_grammar;
|
||||
|
@ -234,6 +260,27 @@ ast_evaluator<charT, iterator>::ast_eval_string(basic_fray<charT> const &s)
|
|||
case 'v':
|
||||
ret.push_back('\v');
|
||||
break;
|
||||
case 'x':
|
||||
if (i + 3 >= end)
|
||||
break;
|
||||
ret.push_back(hex2int(s.data() + i + 2, 2));
|
||||
i += 2;
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
if (i + 5 >= end)
|
||||
break;
|
||||
ret.push_back(hex2int(s.data() + i + 2, 4));
|
||||
i += 4;
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
if (i + 9 >= end)
|
||||
break;
|
||||
ret.push_back(hex2int(s.data() + i + 2, 8));
|
||||
i += 8;
|
||||
break;
|
||||
|
||||
default:
|
||||
ret.push_back(s[i + 1]);
|
||||
break;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <boost/spirit/core.hpp>
|
||||
#include <boost/spirit/utility/confix.hpp>
|
||||
#include <boost/spirit/utility/chset.hpp>
|
||||
#include <boost/spirit/utility/loops.hpp>
|
||||
#include <boost/spirit/tree/ast.hpp>
|
||||
#include <boost/spirit/tree/tree_to_xml.hpp>
|
||||
#include <boost/spirit/symbols.hpp>
|
||||
|
@ -211,13 +212,28 @@ struct parser_grammar : public grammar<parser_grammar<charT> >
|
|||
| string
|
||||
;
|
||||
|
||||
hexchar = chset<>("a-fA-F0-9")
|
||||
;
|
||||
|
||||
octchar = chset<>("0-7")
|
||||
;
|
||||
|
||||
c_string_char =
|
||||
"\\x" >> hexchar >> hexchar
|
||||
| "\\u" >> repeat_p(4)[hexchar]
|
||||
| "\\U" >> repeat_p(8)[hexchar]
|
||||
| "\\o" >> octchar >> octchar >> octchar
|
||||
| "\\" >> anychar_p - (ch_p('x') | 'u' | 'o')
|
||||
| anychar_p - (ch_p('"') | '\\')
|
||||
;
|
||||
|
||||
/*
|
||||
* config_p can't be used here, because it will rewrite
|
||||
* *(c_escape_ch_p[x]) into (*c_escape_ch_p)[x]
|
||||
*/
|
||||
string = inner_node_d[
|
||||
'"'
|
||||
>> leaf_node_d[ *(lex_escape_ch_p - '"') ]
|
||||
>> leaf_node_d[ *(c_string_char) ]
|
||||
>> '"'
|
||||
]
|
||||
;
|
||||
|
@ -341,6 +357,7 @@ struct parser_grammar : public grammar<parser_grammar<charT> >
|
|||
return tern_expr;
|
||||
}
|
||||
|
||||
rule<ScannerT> c_string_char, hexchar, octchar;
|
||||
rule<ScannerT, parser_context<>, parser_tag<id_value> > value;
|
||||
rule<ScannerT, parser_context<>, parser_tag<id_variable> > variable;
|
||||
rule<ScannerT, parser_context<>, parser_tag<id_basic> > basic;
|
||||
|
|
Loading…
Reference in a new issue