mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-23 13:46:48 +00:00
Support named capturing groups in get_matches()
AF rules don't support associative arrays, so the named capturing groups are provided in the array only by their numeric keys. Bug: T374294 Change-Id: I53b39917e6677f3a5b8f68bcf0faebf48668ea27
This commit is contained in:
parent
65c10f5fa0
commit
69ea21dc99
|
@ -979,13 +979,18 @@ class FilterEvaluator {
|
|||
// This way we can return a fixed-dimension array, much easier to manage.
|
||||
// ToDo: Find a better way to do this.
|
||||
// First, strip away escaped parentheses
|
||||
$sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle );
|
||||
$sanitized = preg_replace( '/((\\\\\\\\)*)\\\\\(/', '$1', $needle );
|
||||
|
||||
// Then strip starting parentheses of non-capturing groups, including
|
||||
// atomics, lookaheads and so on, even if not every of them is supported.
|
||||
$sanitized = str_replace( '(?', '', $sanitized );
|
||||
// Avoid stripping named capturing groups: (?P<name>), (?<name>) and (?'name')
|
||||
$sanitized = preg_replace( '/\(\?(?!P?<[a-zA-Z_][a-zA-Z0-9_]*>|\'[a-zA-Z_][a-zA-Z0-9_]*\')/', '', $sanitized );
|
||||
|
||||
// And also strip "(*", used with backtracking verbs like (*FAIL)
|
||||
$sanitized = str_replace( '(*', '', $sanitized );
|
||||
// Finally create an array of falses with dimension = # of capturing groups
|
||||
|
||||
// Finally create an array of falses with dimension = # of capturing groups + 1
|
||||
// (as there is also the 0 element, which contains the whole match)
|
||||
$groupscount = substr_count( $sanitized, '(' ) + 1;
|
||||
$falsy = array_fill( 0, $groupscount, false );
|
||||
|
||||
|
@ -1003,6 +1008,11 @@ class FilterEvaluator {
|
|||
);
|
||||
}
|
||||
|
||||
// Named capturing groups add the capture twice: with a numeric key and with a string key.
|
||||
// AF doesn't provide associative arrays, thus we have to filter out the elements with string keys,
|
||||
// else AFPData::newFromPHPVar would erroneously insert them into the final array, with numeric keys.
|
||||
$matches = array_filter( $matches, 'is_int', ARRAY_FILTER_USE_KEY );
|
||||
|
||||
// Returned array has non-empty positions identical to the ones returned
|
||||
// by the third parameter of a standard preg_match call ($matches in this case).
|
||||
// We want an union with falsy to return a fixed-dimension array.
|
||||
|
|
|
@ -2,4 +2,6 @@ get_matches('I am a (dog|cat)', 'What did you say?') === [ false, false ] &
|
|||
get_matches('The (truth|pineapple) is (?:rarely)? pure and (nee*v(ah|er) sh?imple)', 'The truth is rarely pure and never simple, Wilde said') == ['The truth is rarely pure and never simple', 'truth', 'never simple', 'er'] &
|
||||
get_matches('You say (.*) \(and I say (.*)\)\.', 'You say hello (and I say goodbye).') === [ 'You say hello (and I say goodbye).', 'hello', 'goodbye' ] &
|
||||
get_matches('I(?: am)? the ((walrus|egg man).*)\!', 'I am the egg man, I am the walrus !') === [ 'I am the egg man, I am the walrus !', 'egg man, I am the walrus ', 'egg man' ] &
|
||||
get_matches('this (does) not match', 'foo bar') === [ false, false ]
|
||||
get_matches('this (does) not match', 'foo bar') === [ false, false ] &
|
||||
get_matches('(?P<name1>foo) (?<name2>bar) (?\'name3\'baz)', 'foo bar baz') === [ 'foo bar baz', 'foo', 'bar', 'baz' ] &
|
||||
get_matches('(?P<name1>foo) (?<name2>bar) (?\'name3\'baz)', 'lorem ipsum') === [ false, false, false, false ]
|
||||
|
|
Loading…
Reference in a new issue