Support named capturing groups in get_matches()

AF rules don't support associative arrays, so the named capturing groups are provided in the array only by their numeric keys.

Bug: T374294
Change-Id: I53b39917e6677f3a5b8f68bcf0faebf48668ea27
This commit is contained in:
Anne Haunime 2024-09-07 11:25:48 +00:00 committed by [[mw:User:Od1n]]
parent 65c10f5fa0
commit 69ea21dc99
2 changed files with 16 additions and 4 deletions

View file

@ -979,13 +979,18 @@ class FilterEvaluator {
// This way we can return a fixed-dimension array, much easier to manage.
// ToDo: Find a better way to do this.
// First, strip away escaped parentheses
$sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle );
$sanitized = preg_replace( '/((\\\\\\\\)*)\\\\\(/', '$1', $needle );
// Then strip starting parentheses of non-capturing groups, including
// atomics, lookaheads and so on, even if not every of them is supported.
$sanitized = str_replace( '(?', '', $sanitized );
// Avoid stripping named capturing groups: (?P<name>), (?<name>) and (?'name')
$sanitized = preg_replace( '/\(\?(?!P?<[a-zA-Z_][a-zA-Z0-9_]*>|\'[a-zA-Z_][a-zA-Z0-9_]*\')/', '', $sanitized );
// And also strip "(*", used with backtracking verbs like (*FAIL)
$sanitized = str_replace( '(*', '', $sanitized );
// Finally create an array of falses with dimension = # of capturing groups
// Finally create an array of falses with dimension = # of capturing groups + 1
// (as there is also the 0 element, which contains the whole match)
$groupscount = substr_count( $sanitized, '(' ) + 1;
$falsy = array_fill( 0, $groupscount, false );
@ -1003,6 +1008,11 @@ class FilterEvaluator {
);
}
// Named capturing groups add the capture twice: with a numeric key and with a string key.
// AF doesn't provide associative arrays, thus we have to filter out the elements with string keys,
// else AFPData::newFromPHPVar would erroneously insert them into the final array, with numeric keys.
$matches = array_filter( $matches, 'is_int', ARRAY_FILTER_USE_KEY );
// Returned array has non-empty positions identical to the ones returned
// by the third parameter of a standard preg_match call ($matches in this case).
// We want an union with falsy to return a fixed-dimension array.

View file

@ -2,4 +2,6 @@ get_matches('I am a (dog|cat)', 'What did you say?') === [ false, false ] &
get_matches('The (truth|pineapple) is (?:rarely)? pure and (nee*v(ah|er) sh?imple)', 'The truth is rarely pure and never simple, Wilde said') == ['The truth is rarely pure and never simple', 'truth', 'never simple', 'er'] &
get_matches('You say (.*) \(and I say (.*)\)\.', 'You say hello (and I say goodbye).') === [ 'You say hello (and I say goodbye).', 'hello', 'goodbye' ] &
get_matches('I(?: am)? the ((walrus|egg man).*)\!', 'I am the egg man, I am the walrus !') === [ 'I am the egg man, I am the walrus !', 'egg man, I am the walrus ', 'egg man' ] &
get_matches('this (does) not match', 'foo bar') === [ false, false ]
get_matches('this (does) not match', 'foo bar') === [ false, false ] &
get_matches('(?P<name1>foo) (?<name2>bar) (?\'name3\'baz)', 'foo bar baz') === [ 'foo bar baz', 'foo', 'bar', 'baz' ] &
get_matches('(?P<name1>foo) (?<name2>bar) (?\'name3\'baz)', 'lorem ipsum') === [ false, false, false, false ]