From 69ea21dc99cf1d268cf8804d6e2155d48adbd992 Mon Sep 17 00:00:00 2001 From: Anne Haunime Date: Sat, 7 Sep 2024 11:25:48 +0000 Subject: [PATCH] Support named capturing groups in get_matches() AF rules don't support associative arrays, so the named capturing groups are provided in the array only by their numeric keys. Bug: T374294 Change-Id: I53b39917e6677f3a5b8f68bcf0faebf48668ea27 --- includes/Parser/FilterEvaluator.php | 16 +++++++++++++--- tests/parserTests/get-matches.t | 4 +++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/includes/Parser/FilterEvaluator.php b/includes/Parser/FilterEvaluator.php index 32e54cda3..c2199e348 100644 --- a/includes/Parser/FilterEvaluator.php +++ b/includes/Parser/FilterEvaluator.php @@ -979,13 +979,18 @@ class FilterEvaluator { // This way we can return a fixed-dimension array, much easier to manage. // ToDo: Find a better way to do this. // First, strip away escaped parentheses - $sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle ); + $sanitized = preg_replace( '/((\\\\\\\\)*)\\\\\(/', '$1', $needle ); + // Then strip starting parentheses of non-capturing groups, including // atomics, lookaheads and so on, even if not every of them is supported. - $sanitized = str_replace( '(?', '', $sanitized ); + // Avoid stripping named capturing groups: (?P), (?) and (?'name') + $sanitized = preg_replace( '/\(\?(?!P?<[a-zA-Z_][a-zA-Z0-9_]*>|\'[a-zA-Z_][a-zA-Z0-9_]*\')/', '', $sanitized ); + // And also strip "(*", used with backtracking verbs like (*FAIL) $sanitized = str_replace( '(*', '', $sanitized ); - // Finally create an array of falses with dimension = # of capturing groups + + // Finally create an array of falses with dimension = # of capturing groups + 1 + // (as there is also the 0 element, which contains the whole match) $groupscount = substr_count( $sanitized, '(' ) + 1; $falsy = array_fill( 0, $groupscount, false ); @@ -1003,6 +1008,11 @@ class FilterEvaluator { ); } + // Named capturing groups add the capture twice: with a numeric key and with a string key. + // AF doesn't provide associative arrays, thus we have to filter out the elements with string keys, + // else AFPData::newFromPHPVar would erroneously insert them into the final array, with numeric keys. + $matches = array_filter( $matches, 'is_int', ARRAY_FILTER_USE_KEY ); + // Returned array has non-empty positions identical to the ones returned // by the third parameter of a standard preg_match call ($matches in this case). // We want an union with falsy to return a fixed-dimension array. diff --git a/tests/parserTests/get-matches.t b/tests/parserTests/get-matches.t index 45c4ac857..b2e92cc72 100644 --- a/tests/parserTests/get-matches.t +++ b/tests/parserTests/get-matches.t @@ -2,4 +2,6 @@ get_matches('I am a (dog|cat)', 'What did you say?') === [ false, false ] & get_matches('The (truth|pineapple) is (?:rarely)? pure and (nee*v(ah|er) sh?imple)', 'The truth is rarely pure and never simple, Wilde said') == ['The truth is rarely pure and never simple', 'truth', 'never simple', 'er'] & get_matches('You say (.*) \(and I say (.*)\)\.', 'You say hello (and I say goodbye).') === [ 'You say hello (and I say goodbye).', 'hello', 'goodbye' ] & get_matches('I(?: am)? the ((walrus|egg man).*)\!', 'I am the egg man, I am the walrus !') === [ 'I am the egg man, I am the walrus !', 'egg man, I am the walrus ', 'egg man' ] & -get_matches('this (does) not match', 'foo bar') === [ false, false ] \ No newline at end of file +get_matches('this (does) not match', 'foo bar') === [ false, false ] & +get_matches('(?Pfoo) (?bar) (?\'name3\'baz)', 'foo bar baz') === [ 'foo bar baz', 'foo', 'bar', 'baz' ] & +get_matches('(?Pfoo) (?bar) (?\'name3\'baz)', 'lorem ipsum') === [ false, false, false, false ]