diff --git a/extension.json b/extension.json index e2fe242f3..823053d67 100644 --- a/extension.json +++ b/extension.json @@ -163,6 +163,7 @@ "AbuseFilterVariableHolder": "includes/AbuseFilterVariableHolder.php", "MediaWiki\\Extension\\AbuseFilter\\KeywordsManager": "includes/KeywordsManager.php", "MediaWiki\\Extension\\AbuseFilter\\AbuseFilterServices": "includes/AbuseFilterServices.php", + "MediaWiki\\Extension\\AbuseFilter\\FilterProfiler": "includes/FilterProfiler.php", "AFComputedVariable": "includes/AFComputedVariable.php", "AFPData": "includes/parser/AFPData.php", "AFPException": "includes/parser/AFPException.php", diff --git a/includes/AbuseFilter.php b/includes/AbuseFilter.php index 21e111f20..816e8214b 100644 --- a/includes/AbuseFilter.php +++ b/includes/AbuseFilter.php @@ -166,42 +166,6 @@ class AbuseFilter { return $runner->checkAllFilters(); } - /** - * @param int|string $filter - * @internal - */ - public static function resetFilterProfile( $filter ) { - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); - $profileKey = self::filterProfileKey( $filter ); - - $stash->delete( $profileKey ); - } - - /** - * Retrieve per-filter statistics. - * - * @param string $filter - * @return array - */ - public static function getFilterProfile( $filter ) { - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); - $profile = $stash->get( self::filterProfileKey( $filter ) ); - - if ( $profile !== false ) { - $curCount = $profile['count']; - $curTotalTime = $profile['total-time']; - $curTotalConds = $profile['total-cond']; - } else { - return [ 0, 0, 0, 0 ]; - } - - // Return in milliseconds, rounded to 2dp - $avgTime = round( $curTotalTime / $curCount, 2 ); - $avgCond = round( $curTotalConds / $curCount, 1 ); - - return [ $curCount, $profile['matches'], $avgTime, $avgCond ]; - } - /** * Utility function to split "$index" to an array [ $id, $global ], where * $id is $index casted to int, and $global is a boolean: true if the filter is global, @@ -705,28 +669,6 @@ class AbuseFilter { return $value[$group] ?? $value['default']; } - /** - * Get the memcache access key used to store per-filter profiling data. - * - * @param string|int $filter - * @return string - */ - public static function filterProfileKey( $filter ) { - $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); - return $cache->makeKey( 'abusefilter-profile', 'v3', $filter ); - } - - /** - * Memcache access key used to store overall profiling data for rule groups - * - * @param string $group - * @return string - */ - public static function filterProfileGroupKey( $group ) { - $cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); - return $cache->makeKey( 'abusefilter-profile', 'group', $group ); - } - /** * @return User */ @@ -1212,7 +1154,7 @@ class AbuseFilter { AbuseFilterHooks::purgeTagCache(); } - self::resetFilterProfile( $new_id ); + AbuseFilterServices::getFilterProfiler()->resetFilterProfile( $new_id ); return [ $new_id, $history_id ]; } diff --git a/includes/AbuseFilterRunner.php b/includes/AbuseFilterRunner.php index eca20baea..81d9fbf26 100644 --- a/includes/AbuseFilterRunner.php +++ b/includes/AbuseFilterRunner.php @@ -1,6 +1,8 @@ group = $group; $this->action = $vars->getVar( 'action' )->toString(); $this->hookRunner = AbuseFilterHookRunner::getRunner(); + $this->filterProfiler = AbuseFilterServices::getFilterProfiler(); } /** @@ -454,196 +460,21 @@ class AbuseFilterRunner { * @param string[] $allFilters */ protected function profileExecution( array $result, array $matchedFilters, array $allFilters ) { - $this->checkResetProfiling( $allFilters ); - $this->recordRuntimeProfilingResult( + $this->filterProfiler->checkResetProfiling( $this->group, $allFilters ); + $this->filterProfiler->recordRuntimeProfilingResult( count( $allFilters ), $result['condCount'], $result['runtime'] ); - $this->recordPerFilterProfiling( $result['profiling'] ); - $this->recordStats( $result['condCount'], $result['runtime'], (bool)$matchedFilters ); - } - - /** - * Check if profiling data for all filters is lesser than the limit. If not, delete it and - * also delete per-filter profiling for all filters. Note that we don't need to reset it for - * disabled filters too, as their profiling data will be reset upon re-enabling anyway. - * - * @param array $allFilters - */ - protected function checkResetProfiling( array $allFilters ) { - global $wgAbuseFilterProfileActionsCap; - - $profileKey = AbuseFilter::filterProfileGroupKey( $this->group ); - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); - - $profile = $stash->get( $profileKey ); - $total = $profile['total'] ?? 0; - - if ( $total > $wgAbuseFilterProfileActionsCap ) { - $stash->delete( $profileKey ); - foreach ( $allFilters as $filter ) { - AbuseFilter::resetFilterProfile( $filter ); - } - } - } - - /** - * Record per-filter profiling, for all filters - * - * @param array $data Profiling data, as stored in $this->profilingData - * @phan-param array $data - */ - protected function recordPerFilterProfiling( array $data ) { - global $wgAbuseFilterSlowFilterRuntimeLimit; - - foreach ( $data as $filterName => $params ) { - list( $filterID, $global ) = AbuseFilter::splitGlobalName( $filterName ); - if ( !$global ) { - // @todo Maybe add a parameter to recordProfilingResult to record global filters - // data separately (in the foreign wiki) - $this->recordProfilingResult( $filterID, $params['time'], $params['conds'], $params['result'] ); - } - - if ( $params['time'] > $wgAbuseFilterSlowFilterRuntimeLimit ) { - $this->recordSlowFilter( $filterName, $params['time'], $params['conds'], $params['result'] ); - } - } - } - - /** - * Record per-filter profiling data - * - * @param int $filter - * @param float $time Time taken, in milliseconds - * @param int $conds - * @param bool $matched - */ - protected function recordProfilingResult( $filter, $time, $conds, $matched ) { - // Defer updates to avoid massive (~1 second) edit time increases - DeferredUpdates::addCallableUpdate( function () use ( $filter, $time, $conds, $matched ) { - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); - $profileKey = AbuseFilter::filterProfileKey( $filter ); - $profile = $stash->get( $profileKey ); - - if ( $profile !== false ) { - // Number of observed executions of this filter - $profile['count']++; - if ( $matched ) { - // Number of observed matches of this filter - $profile['matches']++; - } - // Total time spent on this filter from all observed executions - $profile['total-time'] += $time; - // Total number of conditions for this filter from all executions - $profile['total-cond'] += $conds; - } else { - $profile = [ - 'count' => 1, - 'matches' => (int)$matched, - 'total-time' => $time, - 'total-cond' => $conds - ]; - } - // Note: It is important that all key information be stored together in a single - // memcache entry to avoid race conditions where competing Apache instances - // partially overwrite the stats. - $stash->set( $profileKey, $profile, 3600 ); - } ); - } - - /** - * Logs slow filter's runtime data for later analysis - * - * @param string $filterId - * @param float $runtime - * @param int $totalConditions - * @param bool $matched - */ - protected function recordSlowFilter( $filterId, $runtime, $totalConditions, $matched ) { - $logger = LoggerFactory::getInstance( 'AbuseFilter' ); - $logger->info( - 'Edit filter {filter_id} on {wiki} is taking longer than expected', - [ - 'wiki' => WikiMap::getCurrentWikiDbDomain()->getId(), - 'filter_id' => $filterId, - 'title' => $this->title->getPrefixedText(), - 'runtime' => $runtime, - 'matched' => $matched, - 'total_conditions' => $totalConditions - ] + $this->filterProfiler->recordPerFilterProfiling( $this->title, $result['profiling'] ); + $this->filterProfiler->recordStats( + $this->group, + $result['condCount'], + $result['runtime'], + (bool)$matchedFilters ); } - /** - * Update global statistics - * - * @param int $condsUsed The amount of used conditions - * @param float $totalTime Time taken, in milliseconds - * @param bool $anyMatch Whether at least one filter matched the action - */ - protected function recordStats( $condsUsed, $totalTime, $anyMatch ) { - $profileKey = AbuseFilter::filterProfileGroupKey( $this->group ); - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); - - // Note: All related data is stored in a single memcache entry and updated via merge() - // to avoid race conditions where partial updates on competing instances corrupt the data. - $stash->merge( - $profileKey, - function ( $cache, $key, $profile ) use ( $condsUsed, $totalTime, $anyMatch ) { - global $wgAbuseFilterConditionLimit; - - if ( $profile === false ) { - $profile = [ - // Total number of actions observed - 'total' => 0, - // Number of actions ending by exceeding condition limit - 'overflow' => 0, - // Total time of execution of all observed actions - 'total-time' => 0, - // Total number of conditions from all observed actions - 'total-cond' => 0, - // Total number of filters matched - 'matches' => 0 - ]; - } - - $profile['total']++; - $profile['total-time'] += $totalTime; - $profile['total-cond'] += $condsUsed; - - // Increment overflow counter, if our condition limit overflowed - if ( $condsUsed > $wgAbuseFilterConditionLimit ) { - $profile['overflow']++; - } - - // Increment counter by 1 if there was at least one match - if ( $anyMatch ) { - $profile['matches']++; - } - - return $profile; - }, - AbuseFilter::$statsStoragePeriod - ); - } - - /** - * Record runtime profiling data for all filters together - * - * @param int $totalFilters - * @param int $totalConditions - * @param float $runtime - */ - protected function recordRuntimeProfilingResult( $totalFilters, $totalConditions, $runtime ) { - $keyPrefix = 'abusefilter.runtime-profile.' . WikiMap::getCurrentWikiDbDomain()->getId() . '.'; - - $statsd = MediaWikiServices::getInstance()->getStatsdDataFactory(); - $statsd->timing( $keyPrefix . 'runtime', $runtime ); - $statsd->timing( $keyPrefix . 'total_filters', $totalFilters ); - $statsd->timing( $keyPrefix . 'total_conditions', $totalConditions ); - } - /** * Executes a set of actions. * @@ -1374,10 +1205,9 @@ class AbuseFilterRunner { * @param string[] $filters The filters to check */ protected function checkEmergencyDisable( array $filters ) { - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); // @ToDo this is an amount between 1 and AbuseFilterProfileActionsCap, which means that the // reliability of this number may strongly vary. We should instead use a fixed one. - $groupProfile = $stash->get( AbuseFilter::filterProfileGroupKey( $this->group ) ); + $groupProfile = $this->filterProfiler->getGroupProfile( $this->group ); $totalActions = $groupProfile['total']; foreach ( $filters as $filter ) { @@ -1385,8 +1215,8 @@ class AbuseFilterRunner { $hitCountLimit = AbuseFilter::getEmergencyValue( 'count', $this->group ); $maxAge = AbuseFilter::getEmergencyValue( 'age', $this->group ); - $filterProfile = $stash->get( AbuseFilter::filterProfileKey( $filter ) ); - $matchCount = $filterProfile['matches'] ?? 1; + $filterProfile = $this->filterProfiler->getFilterProfile( $filter ); + $matchCount = ( $filterProfile['matches'] ?? 0 ) + 1; // Figure out if the filter is subject to being throttled. $filterAge = (int)wfTimestamp( TS_UNIX, AbuseFilter::getFilter( $filter )->af_timestamp ); diff --git a/includes/AbuseFilterServices.php b/includes/AbuseFilterServices.php index a028c5aab..b641df157 100644 --- a/includes/AbuseFilterServices.php +++ b/includes/AbuseFilterServices.php @@ -12,4 +12,12 @@ class AbuseFilterServices { public static function getKeywordsManager() : KeywordsManager { return MediaWikiServices::getInstance()->getService( KeywordsManager::SERVICE_NAME ); } + + /** + * Conveniency wrapper for strong typing + * @return FilterProfiler + */ + public static function getFilterProfiler() : FilterProfiler { + return MediaWikiServices::getInstance()->getService( FilterProfiler::SERVICE_NAME ); + } } diff --git a/includes/FilterProfiler.php b/includes/FilterProfiler.php new file mode 100644 index 000000000..3df19a0f0 --- /dev/null +++ b/includes/FilterProfiler.php @@ -0,0 +1,314 @@ +objectStash = $objectStash; + $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + $this->options = $options; + $this->localWikiID = $localWikiID; + $this->statsd = $statsd; + $this->logger = $logger; + } + + /** + * @param int|string $filter + */ + public function resetFilterProfile( $filter ) : void { + $profileKey = $this->filterProfileKey( $filter ); + $this->objectStash->delete( $profileKey ); + } + + /** + * Retrieve per-filter statistics. + * + * @param string $filter + * @return array + */ + public function getFilterProfile( string $filter ) : array { + $profile = $this->objectStash->get( $this->filterProfileKey( $filter ) ); + + if ( $profile !== false ) { + $curCount = $profile['count']; + $curTotalTime = $profile['total-time']; + $curTotalConds = $profile['total-cond']; + } else { + return [ 0, 0, 0, 0 ]; + } + + // Return in milliseconds, rounded to 2dp + $avgTime = round( $curTotalTime / $curCount, 2 ); + $avgCond = round( $curTotalConds / $curCount, 1 ); + + return [ $curCount, $profile['matches'], $avgTime, $avgCond ]; + } + + /** + * Retrieve per-group statistics + * @param string $group + * @return array|false + * @phan-return array{total:int,overflow:int,matches:int}|false + */ + public function getGroupProfile( string $group ) { + return $this->objectStash->get( $this->filterProfileGroupKey( $group ) ); + } + + /** + * Record per-filter profiling data + * + * @param int $filter + * @param float $time Time taken, in milliseconds + * @param int $conds + * @param bool $matched + */ + private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ) : void { + // Defer updates to avoid massive (~1 second) edit time increases + DeferredUpdates::addCallableUpdate( function () use ( $filter, $time, $conds, $matched ) { + $profileKey = $this->filterProfileKey( $filter ); + $profile = $this->objectStash->get( $profileKey ); + + if ( $profile !== false ) { + // Number of observed executions of this filter + $profile['count']++; + if ( $matched ) { + // Number of observed matches of this filter + $profile['matches']++; + } + // Total time spent on this filter from all observed executions + $profile['total-time'] += $time; + // Total number of conditions for this filter from all executions + $profile['total-cond'] += $conds; + } else { + $profile = [ + 'count' => 1, + 'matches' => (int)$matched, + 'total-time' => $time, + 'total-cond' => $conds + ]; + } + // Note: It is important that all key information be stored together in a single + // memcache entry to avoid race conditions where competing Apache instances + // partially overwrite the stats. + $this->objectStash->set( $profileKey, $profile, 3600 ); + } ); + } + + /** + * Check if profiling data for all filters is lesser than the limit. If not, delete it and + * also delete per-filter profiling for all filters. Note that we don't need to reset it for + * disabled filters too, as their profiling data will be reset upon re-enabling anyway. + * + * @param string $group + * @param array $allFilters + */ + public function checkResetProfiling( string $group, array $allFilters ) : void { + $profileKey = $this->filterProfileGroupKey( $group ); + + $profile = $this->objectStash->get( $profileKey ); + $total = $profile['total'] ?? 0; + + if ( $total > $this->options->get( 'AbuseFilterProfileActionsCap' ) ) { + $this->objectStash->delete( $profileKey ); + foreach ( $allFilters as $filter ) { + $this->resetFilterProfile( $filter ); + } + } + } + + /** + * Update global statistics + * + * @param string $group + * @param int $condsUsed The amount of used conditions + * @param float $totalTime Time taken, in milliseconds + * @param bool $anyMatch Whether at least one filter matched the action + */ + public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ) : void { + $profileKey = $this->filterProfileGroupKey( $group ); + + // Note: All related data is stored in a single memcache entry and updated via merge() + // to avoid race conditions where partial updates on competing instances corrupt the data. + $this->objectStash->merge( + $profileKey, + function ( $cache, $key, $profile ) use ( $condsUsed, $totalTime, $anyMatch ) { + if ( $profile === false ) { + $profile = [ + // Total number of actions observed + 'total' => 0, + // Number of actions ending by exceeding condition limit + 'overflow' => 0, + // Total time of execution of all observed actions + 'total-time' => 0, + // Total number of conditions from all observed actions + 'total-cond' => 0, + // Total number of filters matched + 'matches' => 0 + ]; + } + + $profile['total']++; + $profile['total-time'] += $totalTime; + $profile['total-cond'] += $condsUsed; + + // Increment overflow counter, if our condition limit overflowed + if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) { + $profile['overflow']++; + } + + // Increment counter by 1 if there was at least one match + if ( $anyMatch ) { + $profile['matches']++; + } + + return $profile; + }, + self::STATS_STORAGE_PERIOD + ); + } + + /** + * Record runtime profiling data for all filters together + * + * @param int $totalFilters + * @param int $totalConditions + * @param float $runtime + */ + public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ) : void { + $keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.'; + + $this->statsd->timing( $keyPrefix . 'runtime', $runtime ); + $this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters ); + $this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions ); + } + + /** + * Record per-filter profiling, for all filters + * + * @param Title $title + * @param array $data Profiling data, as stored in $this->profilingData + * @phan-param array $data + */ + public function recordPerFilterProfiling( Title $title, array $data ) : void { + foreach ( $data as $filterName => $params ) { + list( $filterID, $global ) = AbuseFilter::splitGlobalName( $filterName ); + if ( !$global ) { + // @todo Maybe add a parameter to recordProfilingResult to record global filters + // data separately (in the foreign wiki) + $this->recordProfilingResult( + $filterID, + $params['time'], + $params['conds'], + $params['result'] + ); + } + + if ( $params['time'] > $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' ) ) { + $this->recordSlowFilter( $title, $filterName, $params['time'], $params['conds'], $params['result'] ); + } + } + } + + /** + * Logs slow filter's runtime data for later analysis + * + * @param Title $title + * @param string $filterId + * @param float $runtime + * @param int $totalConditions + * @param bool $matched + */ + private function recordSlowFilter( + Title $title, + string $filterId, + float $runtime, + int $totalConditions, + bool $matched + ) : void { + $this->logger->info( + 'Edit filter {filter_id} on {wiki} is taking longer than expected', + [ + 'wiki' => $this->localWikiID, + 'filter_id' => $filterId, + 'title' => $title->getPrefixedText(), + 'runtime' => $runtime, + 'matched' => $matched, + 'total_conditions' => $totalConditions + ] + ); + } + + /** + * Get the memcache access key used to store per-filter profiling data. + * + * @param string|int $filter + * @return string + */ + private function filterProfileKey( $filter ) : string { + return $this->objectStash->makeKey( 'abusefilter-profile', 'v3', $filter ); + } + + /** + * Memcache access key used to store overall profiling data for rule groups + * + * @param string $group + * @return string + */ + private function filterProfileGroupKey( string $group ) : string { + return $this->objectStash->makeKey( 'abusefilter-profile', 'group', $group ); + } +} diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php index 5901ca802..e9ca8ec57 100644 --- a/includes/ServiceWiring.php +++ b/includes/ServiceWiring.php @@ -1,7 +1,10 @@ getHookContainer() ) ); }, + FilterProfiler::SERVICE_NAME => function ( MediaWikiServices $services ): FilterProfiler { + return new FilterProfiler( + $services->getMainObjectStash(), + new ServiceOptions( + FilterProfiler::CONSTRUCTOR_OPTIONS, + $services->getMainConfig() + ), + WikiMap::getCurrentWikiDbDomain()->getId(), + $services->getStatsdDataFactory(), + LoggerFactory::getInstance( 'AbuseFilter' ) + ); + }, ]; diff --git a/includes/Views/AbuseFilterViewEdit.php b/includes/Views/AbuseFilterViewEdit.php index 10acf1809..cd4a144af 100644 --- a/includes/Views/AbuseFilterViewEdit.php +++ b/includes/Views/AbuseFilterViewEdit.php @@ -1,5 +1,6 @@ af_enabled ) { // Statistics list( $totalCount, $matchesCount, $avgTime, $avgCond ) = - AbuseFilter::getFilterProfile( $filter ); + AbuseFilterServices::getFilterProfiler()->getFilterProfile( $filter ); if ( $totalCount > 0 ) { $matchesPercent = round( 100 * $matchesCount / $totalCount, 2 ); diff --git a/includes/Views/AbuseFilterViewList.php b/includes/Views/AbuseFilterViewList.php index 1362ff23e..407895d38 100644 --- a/includes/Views/AbuseFilterViewList.php +++ b/includes/Views/AbuseFilterViewList.php @@ -1,6 +1,6 @@ getMainObjectStash(); + $filterProfiler = AbuseFilterServices::getFilterProfiler(); $totalCount = 0; $matchCount = 0; $overflowCount = 0; foreach ( $this->getConfig()->get( 'AbuseFilterValidGroups' ) as $group ) { - $profile = $stash->get( AbuseFilter::filterProfileGroupKey( $group ) ); + $profile = $filterProfiler->getGroupProfile( $group ); if ( $profile !== false ) { $totalCount += $profile[ 'total' ]; $overflowCount += $profile[ 'overflow' ]; diff --git a/tests/phpunit/AbuseFilterConsequencesTest.php b/tests/phpunit/AbuseFilterConsequencesTest.php index 82f6b4bcf..76d56f012 100644 --- a/tests/phpunit/AbuseFilterConsequencesTest.php +++ b/tests/phpunit/AbuseFilterConsequencesTest.php @@ -1,6 +1,7 @@ doAction( $actionParams ); MWTimestamp::setFakeTime( false ); - $stash = MediaWikiServices::getInstance()->getMainObjectStash(); + $profiler = AbuseFilterServices::getFilterProfiler(); // Global stats shown on the top of Special:AbuseFilter - $globalStats = $stash->get( AbuseFilter::filterProfileGroupKey( 'default' ) ); + $globalStats = $profiler->getGroupProfile( 'default' ); $actualGlobalStats = [ 'totalMatches' => $globalStats['matches'], 'totalActions' => $globalStats['total'], @@ -1822,7 +1823,7 @@ class AbuseFilterConsequencesTest extends MediaWikiTestCase { // Per-filter stats shown on the top of Special:AbuseFilter/xxx foreach ( $createIds as $id ) { - list( $totalActions, $matches, , $conds ) = AbuseFilter::getFilterProfile( $id ); + list( $totalActions, $matches, , $conds ) = $profiler->getFilterProfile( $id ); $actualStats = [ 'matches' => $matches, 'actions' => $totalActions,