From 7bc70d116eb8bfb8f9375542db78367661f63ce8 Mon Sep 17 00:00:00 2001 From: Daimona Eaytoy Date: Wed, 30 Oct 2019 13:36:19 +0100 Subject: [PATCH] Use PHP regexps instead of SQL to filter on Special:AbuseFilter As the code comment says, and as it was suggested in Iafe54285384bc28b3e8812b495166f2682d4571c, we were validating the provided regexp as PCRE, but using it in SQL, which only supports POSIX. Furthermore, we won't have to worry about cross-DBMS compat anymore. Bug: T193068 Change-Id: If6d8717795b6c1dcf619a23363eb6144902cfaed --- i18n/en.json | 1 + i18n/qqq.json | 1 + includes/Views/AbuseFilterViewList.php | 72 +++----- includes/pagers/AbuseFilterPager.php | 197 ++++++++++++++------- includes/pagers/GlobalAbuseFilterPager.php | 2 +- 5 files changed, 164 insertions(+), 109 deletions(-) diff --git a/i18n/en.json b/i18n/en.json index a6ae35495..142b1d223 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -159,6 +159,7 @@ "abusefilter-list-options-search-like": "Plain query", "abusefilter-list-options-search-rlike": "Regular expression", "abusefilter-list-options-search-irlike": "Case-insensitive regular expression", + "abusefilter-list-invalid-searchmode": "The specified search mode is not valid.", "abusefilter-list-regexerror": "An error has occurred while searching: Regular expression syntax error.", "abusefilter-list-options-submit": "Update", "abusefilter-tools-text": "Here are some tools which may be useful in formulating and debugging abuse filters.", diff --git a/i18n/qqq.json b/i18n/qqq.json index b029c94d0..2f02e47be 100644 --- a/i18n/qqq.json +++ b/i18n/qqq.json @@ -193,6 +193,7 @@ "abusefilter-list-options-search-like": "Radio button label in filter form.", "abusefilter-list-options-search-rlike": "Radio button label in filter form. See [[w:en:regular expression]]", "abusefilter-list-options-search-irlike": "Radio button label in filter form. See [[w:en:regular expression]]", + "abusefilter-list-invalid-searchmode": "Error message text.", "abusefilter-list-regexerror": "Error message text.", "abusefilter-list-options-submit": "Submit button text in filter form to update a filtered list.\n{{Identical|Update}}", "abusefilter-tools-text": "Introduction test for abuse filter tools.", diff --git a/includes/Views/AbuseFilterViewList.php b/includes/Views/AbuseFilterViewList.php index e8376cfc0..4653e9f6c 100644 --- a/includes/Views/AbuseFilterViewList.php +++ b/includes/Views/AbuseFilterViewList.php @@ -54,7 +54,7 @@ class AbuseFilterViewList extends AbuseFilterView { $scope === 'global' ); if ( $searchEnabled ) { - $querypattern = $request->getVal( 'querypattern' ); + $querypattern = $request->getVal( 'querypattern', '' ); $searchmode = $request->getVal( 'searchoption', 'LIKE' ); } else { $querypattern = ''; @@ -84,51 +84,31 @@ class AbuseFilterViewList extends AbuseFilterView { $conds['af_global'] = 1; } - $dbr = wfGetDB( DB_REPLICA ); - if ( $querypattern !== '' ) { - if ( $searchmode !== 'LIKE' ) { - if ( !StringUtils::isValidPCRERegex( "/$querypattern/" ) ) { - $out->addHTML( - Xml::tags( - 'p', - null, - Html::errorBox( $this->msg( 'abusefilter-list-regexerror' )->parse() ) - ) - ); - $this->showList( - [ 'af_deleted' => 0 ], - compact( - 'deleted', - 'furtherOptions', - 'querypattern', - 'searchmode', - 'scope', - 'searchEnabled' - ) - ); - return; - } - if ( $searchmode === 'RLIKE' ) { - $conds[] = 'af_pattern RLIKE ' . - $dbr->addQuotes( $querypattern ); - } else { - $conds[] = 'LOWER( CAST( af_pattern AS char ) ) RLIKE ' . - strtolower( $dbr->addQuotes( $querypattern ) ); - } - } else { - // Build like query escaping tokens and encapsulating in % to search everywhere - $conds[] = 'LOWER( CAST( af_pattern AS char ) ) ' . - $dbr->buildLike( - $dbr->anyString(), - strtolower( $querypattern ), - $dbr->anyString() - ); + // Check the search pattern. Filtering the results is done in AbuseFilterPager + $error = null; + if ( !in_array( $searchmode, [ 'LIKE', 'RLIKE', 'IRLIKE' ] ) ) { + $error = 'abusefilter-list-invalid-searchmode'; + } elseif ( $searchmode !== 'LIKE' && !StringUtils::isValidPCRERegex( "/$querypattern/" ) ) { + $error = 'abusefilter-list-regexerror'; + } + + if ( $error !== null ) { + $out->addHTML( + Xml::tags( + 'p', + null, + Html::errorBox( $this->msg( $error )->escaped() ) + ) + ); + + // Reset the conditions in case of error + $conds = [ 'af_deleted' => 0 ]; + $querypattern = ''; } } $this->showList( - $conds, compact( 'deleted', 'furtherOptions', @@ -136,15 +116,16 @@ class AbuseFilterViewList extends AbuseFilterView { 'searchmode', 'scope', 'searchEnabled' - ) + ), + $conds ); } /** - * @param array $conds * @param array $optarray + * @param array $conds */ - public function showList( $conds = [ 'af_deleted' => 0 ], $optarray = [] ) { + private function showList( array $optarray, array $conds = [ 'af_deleted' => 0 ] ) { $config = $this->getConfig(); $this->getOutput()->addHTML( Xml::tags( 'h2', null, $this->msg( 'abusefilter-list' )->parse() ) @@ -172,7 +153,8 @@ class AbuseFilterViewList extends AbuseFilterView { $this, $conds, $this->linkRenderer, - [ $querypattern, $searchmode ] + $querypattern, + $searchmode ); } diff --git a/includes/pagers/AbuseFilterPager.php b/includes/pagers/AbuseFilterPager.php index ba6dae1f7..c34e3b80c 100644 --- a/includes/pagers/AbuseFilterPager.php +++ b/includes/pagers/AbuseFilterPager.php @@ -21,23 +21,33 @@ class AbuseFilterPager extends TablePager { */ public $mConds; /** - * @var string[] Info used for searching patterns. The first element is the specified pattern, - * the second is the search mode (LIKE, RLIKE or IRLIKE) + * @var string The pattern being searched */ - public $mQuery; + private $mSearchPattern; + /** + * @var string The pattern search mode (LIKE, RLIKE or IRLIKE) + */ + private $mSearchMode; /** * @param AbuseFilterViewList $page * @param array $conds * @param LinkRenderer $linkRenderer - * @param array $query + * @param string $searchPattern Empty string if no pattern was specified + * @param string $searchMode */ - public function __construct( AbuseFilterViewList $page, $conds, LinkRenderer $linkRenderer, - $query ) { + public function __construct( + AbuseFilterViewList $page, + $conds, + LinkRenderer $linkRenderer, + string $searchPattern, + string $searchMode + ) { $this->mPage = $page; $this->mConds = $conds; $this->linkRenderer = $linkRenderer; - $this->mQuery = $query; + $this->mSearchPattern = $searchPattern; + $this->mSearchMode = $searchMode; parent::__construct( $this->mPage->getContext() ); } @@ -68,6 +78,61 @@ class AbuseFilterPager extends TablePager { ]; } + /** + * @inheritDoc + * This is the same as the parent implementation if no search pattern was specified. + * Otherwise, it does a query with no limit and then slices the results à la ContribsPager. + */ + public function reallyDoQuery( $offset, $limit, $order ) { + if ( !strlen( $this->mSearchPattern ) ) { + return parent::reallyDoQuery( $offset, $limit, $order ); + } + + list( $tables, $fields, $conds, $fname, $options, $join_conds ) = + $this->buildQueryInfo( $offset, $limit, $order ); + + unset( $options['LIMIT'] ); + $res = $this->mDb->select( $tables, $fields, $conds, $fname, $options, $join_conds ); + + $filtered = []; + foreach ( $res as $row ) { + if ( $this->matchesPattern( $row->af_pattern ) ) { + $filtered[ $row->af_id ] = $row; + } + } + + // sort results and enforce limit like ContribsPager + if ( $order === self::QUERY_ASCENDING ) { + ksort( $filtered ); + } else { + krsort( $filtered ); + } + $filtered = array_slice( $filtered, 0, $limit ); + $filtered = array_values( $filtered ); + return new FakeResultWrapper( $filtered ); + } + + /** + * Check whether $subject matches the given $pattern. + * + * @param string $subject + * @return bool + * @throws LogicException + */ + private function matchesPattern( $subject ) { + $pattern = $this->mSearchPattern; + switch ( $this->mSearchMode ) { + case 'RLIKE': + return (bool)preg_match( "/$pattern/u", $subject ); + case 'IRLIKE': + return (bool)preg_match( "/$pattern/ui", $subject ); + case 'LIKE': + return mb_stripos( $subject, $pattern ) !== false; + default: + throw new LogicException( "Unknown search type {$this->mSearchMode}" ); + } + } + /** * @see Pager::getFieldNames() * @return array @@ -93,7 +158,7 @@ class AbuseFilterPager extends TablePager { $headers['af_hit_count'] = 'abusefilter-list-hitcount'; } - if ( AbuseFilter::canViewPrivate( $user ) && !empty( $this->mQuery[0] ) ) { + if ( AbuseFilter::canViewPrivate( $user ) && $this->mSearchPattern !== '' ) { $headers['af_pattern'] = 'abusefilter-list-pattern'; } @@ -125,60 +190,7 @@ class AbuseFilterPager extends TablePager { $lang->formatNum( intval( $value ) ) ); case 'af_pattern': - if ( $this->mQuery[1] === 'LIKE' ) { - $position = mb_stripos( $row->af_pattern, $this->mQuery[0] ); - if ( $position === false ) { - // This may happen due to problems with character encoding - // which aren't easy to solve - return htmlspecialchars( mb_substr( $row->af_pattern, 0, 50 ) ); - } - $length = mb_strlen( $this->mQuery[0] ); - } else { - $regex = '/' . $this->mQuery[0] . '/u'; - if ( $this->mQuery[1] === 'IRLIKE' ) { - $regex .= 'i'; - } - - $matches = []; - Wikimedia\suppressWarnings(); - $check = preg_match( - $regex, - $row->af_pattern, - $matches - ); - Wikimedia\restoreWarnings(); - // This may happen in case of catastrophic backtracking - if ( $check === false ) { - return htmlspecialchars( mb_substr( $row->af_pattern, 0, 50 ) ); - } - - $length = mb_strlen( $matches[0] ); - $position = mb_strpos( $row->af_pattern, $matches[0] ); - } - - $remaining = 50 - $length; - if ( $remaining <= 0 ) { - // Truncate the filter pattern and only show the first 50 characters of the match - $pattern = '' . - htmlspecialchars( mb_substr( $row->af_pattern, $position, 50 ) ) . - ''; - } else { - // Center the snippet on the matched string - $minoffset = max( $position - round( $remaining / 2 ), 0 ); - $pattern = mb_substr( $row->af_pattern, $minoffset, 50 ); - $pattern = - htmlspecialchars( mb_substr( $pattern, 0, $position - $minoffset ) ) . - '' . - htmlspecialchars( mb_substr( $pattern, $position - $minoffset, $length ) ) . - '' . - htmlspecialchars( mb_substr( - $pattern, - $position - $minoffset + $length, - $remaining - ( $position - $minoffset + $length ) - ) - ); - } - return $pattern; + return $this->getHighlightedPattern( $row ); case 'af_public_comments': return $this->linkRenderer->makeLink( SpecialPage::getTitleFor( 'AbuseFilter', intval( $row->af_id ) ), @@ -257,7 +269,7 @@ class AbuseFilterPager extends TablePager { ) )->params( wfEscapeWikiText( $row->af_user_text ) - )->parse(); + )->parse(); case 'af_group': return AbuseFilter::nameGroup( $value ); default: @@ -265,6 +277,65 @@ class AbuseFilterPager extends TablePager { } } + /** + * Get the filter pattern with elements surrounding the searched pattern + * + * @param stdClass $row + * @return string + */ + private function getHighlightedPattern( stdClass $row ) { + $maxLen = 50; + if ( $this->mSearchMode === 'LIKE' ) { + $position = mb_stripos( $row->af_pattern, $this->mSearchPattern ); + $length = mb_strlen( $this->mSearchPattern ); + } else { + $regex = '/' . $this->mSearchPattern . '/u'; + if ( $this->mSearchMode === 'IRLIKE' ) { + $regex .= 'i'; + } + + $matches = []; + Wikimedia\suppressWarnings(); + $check = preg_match( + $regex, + $row->af_pattern, + $matches + ); + Wikimedia\restoreWarnings(); + // This may happen in case of catastrophic backtracking, or regexps matching + // the empty string. + if ( $check === false || strlen( $matches[0] ) === 0 ) { + return htmlspecialchars( mb_substr( $row->af_pattern, 0, 50 ) ); + } + + $length = mb_strlen( $matches[0] ); + $position = mb_strpos( $row->af_pattern, $matches[0] ); + } + + $remaining = $maxLen - $length; + if ( $remaining <= 0 ) { + $pattern = '' . + htmlspecialchars( mb_substr( $row->af_pattern, $position, $maxLen ) ) . + ''; + } else { + // Center the snippet on the matched string + $minoffset = max( $position - round( $remaining / 2 ), 0 ); + $pattern = mb_substr( $row->af_pattern, $minoffset, $maxLen ); + $pattern = + htmlspecialchars( mb_substr( $pattern, 0, $position - $minoffset ) ) . + '' . + htmlspecialchars( mb_substr( $pattern, $position - $minoffset, $length ) ) . + '' . + htmlspecialchars( mb_substr( + $pattern, + $position - $minoffset + $length, + $remaining - ( $position - $minoffset + $length ) + ) + ); + } + return $pattern; + } + /** * @return string */ diff --git a/includes/pagers/GlobalAbuseFilterPager.php b/includes/pagers/GlobalAbuseFilterPager.php index 983e912eb..7cf13cf9a 100644 --- a/includes/pagers/GlobalAbuseFilterPager.php +++ b/includes/pagers/GlobalAbuseFilterPager.php @@ -12,7 +12,7 @@ class GlobalAbuseFilterPager extends AbuseFilterPager { * @param LinkRenderer $linkRenderer */ public function __construct( AbuseFilterViewList $page, $conds, LinkRenderer $linkRenderer ) { - parent::__construct( $page, $conds, $linkRenderer, [ '', 'LIKE' ] ); + parent::__construct( $page, $conds, $linkRenderer, '', 'LIKE' ); $this->mDb = wfGetDB( DB_REPLICA, [], $this->getConfig()->get( 'AbuseFilterCentralDB' ) ); }