mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-09-23 10:18:22 +00:00
Merge "FilterProfiler: use WRStats"
This commit is contained in:
commit
c3c70f7fa0
|
@ -479,14 +479,6 @@
|
|||
"value": 500,
|
||||
"description": "Runtime in milliseconds before a filter is considered slow."
|
||||
},
|
||||
"AbuseFilterProfileActionsCap": {
|
||||
"value": 10000,
|
||||
"description": "Number of action that determines when to reset profiling stats."
|
||||
},
|
||||
"AbuseFilterProfilerCache": {
|
||||
"value": null,
|
||||
"description": "The key in $wgObjectCaches to use for a profiler, or null to use the main stash"
|
||||
},
|
||||
"AbuseFilterRangeBlockSize": {
|
||||
"value": {
|
||||
"IPv4": 16,
|
||||
|
|
|
@ -7,6 +7,8 @@ use IBufferingStatsdDataFactory;
|
|||
use MediaWiki\Config\ServiceOptions;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Title;
|
||||
use Wikimedia\WRStats\LocalEntityKey;
|
||||
use Wikimedia\WRStats\WRStatsFactory;
|
||||
|
||||
/**
|
||||
* This class is used to create, store, and retrieve profiling information for single filters and
|
||||
|
@ -17,42 +19,30 @@ class FilterProfiler {
|
|||
public const SERVICE_NAME = 'AbuseFilterFilterProfiler';
|
||||
|
||||
public const CONSTRUCTOR_OPTIONS = [
|
||||
'AbuseFilterProfileActionsCap',
|
||||
'AbuseFilterConditionLimit',
|
||||
'AbuseFilterSlowFilterRuntimeLimit',
|
||||
];
|
||||
|
||||
private const NULL_FILTER_PROFILE = [
|
||||
// Number of observed executions of this filter
|
||||
'count' => 0,
|
||||
// Number of observed matches of this filter
|
||||
'matches' => 0,
|
||||
// Total time spent on this filter from all observed executions (in milliseconds)
|
||||
'total-time' => 0.0,
|
||||
// Total number of conditions for this filter from all executions
|
||||
'total-cond' => 0,
|
||||
];
|
||||
|
||||
private const NULL_GROUP_PROFILE = [
|
||||
// Total number of actions observed
|
||||
'total' => 0,
|
||||
// Number of actions ending by exceeding condition limit
|
||||
'overflow' => 0,
|
||||
// Total time of execution of all observed actions (in milliseconds)
|
||||
'total-time' => 0.0,
|
||||
// Total number of conditions from all observed actions
|
||||
'total-cond' => 0,
|
||||
// Total number of filters matched
|
||||
'matches' => 0,
|
||||
];
|
||||
|
||||
/**
|
||||
* @var int How long to keep profiling data in cache (in seconds)
|
||||
* How long to keep profiling data in cache (in seconds)
|
||||
*/
|
||||
private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY;
|
||||
|
||||
/** @var BagOStuff */
|
||||
private $objectStash;
|
||||
/** The stats time bucket size */
|
||||
private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12;
|
||||
|
||||
/** The WRStats spec common to all metrics */
|
||||
private const STATS_TEMPLATE = [
|
||||
'sequences' => [ [
|
||||
'timeStep' => self::STATS_TIME_STEP,
|
||||
'expiry' => self::STATS_STORAGE_PERIOD,
|
||||
] ],
|
||||
];
|
||||
|
||||
private const KEY_PREFIX = 'abusefilter-profile';
|
||||
|
||||
/** @var WRStatsFactory */
|
||||
private $statsFactory;
|
||||
|
||||
/** @var ServiceOptions */
|
||||
private $options;
|
||||
|
@ -66,34 +56,48 @@ class FilterProfiler {
|
|||
/** @var LoggerInterface */
|
||||
private $logger;
|
||||
|
||||
/** @var array */
|
||||
private $statsSpecs;
|
||||
|
||||
/**
|
||||
* @param BagOStuff $objectStash
|
||||
* @param WRStatsFactory $statsFactory
|
||||
* @param ServiceOptions $options
|
||||
* @param string $localWikiID
|
||||
* @param IBufferingStatsdDataFactory $statsd
|
||||
* @param LoggerInterface $logger
|
||||
*/
|
||||
public function __construct(
|
||||
BagOStuff $objectStash,
|
||||
WRStatsFactory $statsFactory,
|
||||
ServiceOptions $options,
|
||||
string $localWikiID,
|
||||
IBufferingStatsdDataFactory $statsd,
|
||||
LoggerInterface $logger
|
||||
) {
|
||||
$this->objectStash = $objectStash;
|
||||
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
||||
$this->statsFactory = $statsFactory;
|
||||
$this->options = $options;
|
||||
$this->localWikiID = $localWikiID;
|
||||
$this->statsd = $statsd;
|
||||
$this->logger = $logger;
|
||||
$this->statsSpecs = [
|
||||
'count' => self::STATS_TEMPLATE,
|
||||
'total' => self::STATS_TEMPLATE,
|
||||
'overflow' => self::STATS_TEMPLATE,
|
||||
'matches' => self::STATS_TEMPLATE,
|
||||
'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE,
|
||||
'total-cond' => self::STATS_TEMPLATE
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $filter
|
||||
*/
|
||||
public function resetFilterProfile( int $filter ): void {
|
||||
$profileKey = $this->filterProfileKey( $filter );
|
||||
$this->objectStash->delete( $profileKey );
|
||||
$writer = $this->statsFactory->createWriter(
|
||||
$this->statsSpecs,
|
||||
self::KEY_PREFIX
|
||||
);
|
||||
$writer->resetAll( [ $this->filterProfileKey( $filter ) ] );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -104,8 +108,15 @@ class FilterProfiler {
|
|||
* @phan-return array{count:int,matches:int,total-time:float,total-cond:int}
|
||||
*/
|
||||
public function getFilterProfile( int $filter ): array {
|
||||
return $this->objectStash->get( $this->filterProfileKey( $filter ) )
|
||||
?: self::NULL_FILTER_PROFILE;
|
||||
$reader = $this->statsFactory->createReader(
|
||||
$this->statsSpecs,
|
||||
self::KEY_PREFIX
|
||||
);
|
||||
return $reader->total( $reader->getRates(
|
||||
[ 'count', 'matches', 'total-time', 'total-cond' ],
|
||||
$this->filterProfileKey( $filter ),
|
||||
$reader->latest( self::STATS_STORAGE_PERIOD )
|
||||
) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -116,8 +127,15 @@ class FilterProfiler {
|
|||
* @phan-return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int}
|
||||
*/
|
||||
public function getGroupProfile( string $group ): array {
|
||||
return $this->objectStash->get( $this->filterProfileGroupKey( $group ) )
|
||||
?: self::NULL_GROUP_PROFILE;
|
||||
$reader = $this->statsFactory->createReader(
|
||||
$this->statsSpecs,
|
||||
self::KEY_PREFIX
|
||||
);
|
||||
return $reader->total( $reader->getRates(
|
||||
[ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ],
|
||||
$this->filterProfileGroupKey( $group ),
|
||||
$reader->latest( self::STATS_STORAGE_PERIOD )
|
||||
) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -129,52 +147,18 @@ class FilterProfiler {
|
|||
* @param bool $matched
|
||||
*/
|
||||
private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void {
|
||||
// Note: It is important that all key information be stored together in a single
|
||||
// memcache entry to avoid race conditions where competing Apache instances
|
||||
// partially overwrite the stats.
|
||||
$profileKey = $this->filterProfileKey( $filter );
|
||||
$this->objectStash->merge(
|
||||
$profileKey,
|
||||
static function ( $cache, $key, $profile ) use ( $time, $conds, $matched ) {
|
||||
if ( $profile === false ) {
|
||||
$profile = self::NULL_FILTER_PROFILE;
|
||||
}
|
||||
|
||||
$profile['count']++;
|
||||
if ( $matched ) {
|
||||
$profile['matches']++;
|
||||
}
|
||||
$profile['total-time'] += $time;
|
||||
$profile['total-cond'] += $conds;
|
||||
|
||||
return $profile;
|
||||
},
|
||||
BagOStuff::TTL_HOUR
|
||||
$key = $this->filterProfileKey( $filter );
|
||||
$writer = $this->statsFactory->createWriter(
|
||||
$this->statsSpecs,
|
||||
self::KEY_PREFIX
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if profiling data for all filters is lesser than the limit. If not, delete it and
|
||||
* also delete per-filter profiling for all filters. Note that we don't need to reset it for
|
||||
* disabled filters too, as their profiling data will be reset upon re-enabling anyway.
|
||||
*
|
||||
* @param string $group
|
||||
* @param string[] $allFilters
|
||||
*/
|
||||
public function checkResetProfiling( string $group, array $allFilters ): void {
|
||||
$profile = $this->getGroupProfile( $group );
|
||||
$total = $profile['total'];
|
||||
|
||||
if ( $total > $this->options->get( 'AbuseFilterProfileActionsCap' ) ) {
|
||||
$profileKey = $this->filterProfileGroupKey( $group );
|
||||
$this->objectStash->delete( $profileKey );
|
||||
foreach ( $allFilters as $filter ) {
|
||||
list( $filterID, $global ) = GlobalNameUtils::splitGlobalName( $filter );
|
||||
if ( $global === false ) {
|
||||
$this->resetFilterProfile( $filterID );
|
||||
}
|
||||
}
|
||||
$writer->incr( 'count', $key );
|
||||
if ( $matched ) {
|
||||
$writer->incr( 'matches', $key );
|
||||
}
|
||||
$writer->incr( 'total-time', $key, $time );
|
||||
$writer->incr( 'total-cond', $key, $conds );
|
||||
$writer->flush();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -186,35 +170,26 @@ class FilterProfiler {
|
|||
* @param bool $anyMatch Whether at least one filter matched the action
|
||||
*/
|
||||
public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void {
|
||||
$profileKey = $this->filterProfileGroupKey( $group );
|
||||
|
||||
// Note: All related data is stored in a single memcache entry and updated via merge()
|
||||
// to avoid race conditions where partial updates on competing instances corrupt the data.
|
||||
$this->objectStash->merge(
|
||||
$profileKey,
|
||||
function ( $cache, $key, $profile ) use ( $condsUsed, $totalTime, $anyMatch ) {
|
||||
if ( $profile === false ) {
|
||||
$profile = self::NULL_GROUP_PROFILE;
|
||||
}
|
||||
|
||||
$profile['total']++;
|
||||
$profile['total-time'] += $totalTime;
|
||||
$profile['total-cond'] += $condsUsed;
|
||||
|
||||
// Increment overflow counter, if our condition limit overflowed
|
||||
if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) {
|
||||
$profile['overflow']++;
|
||||
}
|
||||
|
||||
// Increment counter by 1 if there was at least one match
|
||||
if ( $anyMatch ) {
|
||||
$profile['matches']++;
|
||||
}
|
||||
|
||||
return $profile;
|
||||
},
|
||||
self::STATS_STORAGE_PERIOD
|
||||
$writer = $this->statsFactory->createWriter(
|
||||
$this->statsSpecs,
|
||||
self::KEY_PREFIX
|
||||
);
|
||||
$key = $this->filterProfileGroupKey( $group );
|
||||
|
||||
$writer->incr( 'total', $key );
|
||||
$writer->incr( 'total-time', $key, $totalTime );
|
||||
$writer->incr( 'total-cond', $key, $condsUsed );
|
||||
|
||||
// Increment overflow counter, if our condition limit overflowed
|
||||
if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) {
|
||||
$writer->incr( 'overflow', $key );
|
||||
}
|
||||
|
||||
// Increment counter by 1 if there was at least one match
|
||||
if ( $anyMatch ) {
|
||||
$writer->incr( 'matches', $key );
|
||||
}
|
||||
$writer->flush();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -302,22 +277,22 @@ class FilterProfiler {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the memcache access key used to store per-filter profiling data.
|
||||
* Get the WRStats entity key used to store per-filter profiling data.
|
||||
*
|
||||
* @param int $filter
|
||||
* @return string
|
||||
* @return LocalEntityKey
|
||||
*/
|
||||
private function filterProfileKey( int $filter ): string {
|
||||
return $this->objectStash->makeKey( 'abusefilter-profile', 'v3', $filter );
|
||||
private function filterProfileKey( int $filter ): LocalEntityKey {
|
||||
return new LocalEntityKey( [ 'filter', (string)$filter ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Memcache access key used to store overall profiling data for rule groups
|
||||
* WRStats entity key used to store overall profiling data for rule groups
|
||||
*
|
||||
* @param string $group
|
||||
* @return string
|
||||
* @return LocalEntityKey
|
||||
*/
|
||||
private function filterProfileGroupKey( string $group ): string {
|
||||
return $this->objectStash->makeKey( 'abusefilter-profile', 'group', $group );
|
||||
private function filterProfileGroupKey( string $group ): LocalEntityKey {
|
||||
return new LocalEntityKey( [ 'group', $group ] );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -381,7 +381,6 @@ class FilterRunner {
|
|||
protected function profileExecution( RunnerData $data ) {
|
||||
$allFilters = $data->getAllFilters();
|
||||
$matchedFilters = $data->getMatchedFilters();
|
||||
$this->filterProfiler->checkResetProfiling( $this->group, $allFilters );
|
||||
$this->filterProfiler->recordRuntimeProfilingResult(
|
||||
count( $allFilters ),
|
||||
$data->getTotalConditions(),
|
||||
|
|
|
@ -56,14 +56,8 @@ return [
|
|||
return new KeywordsManager( $services->get( AbuseFilterHookRunner::SERVICE_NAME ) );
|
||||
},
|
||||
FilterProfiler::SERVICE_NAME => static function ( MediaWikiServices $services ): FilterProfiler {
|
||||
$cacheType = $services->getMainConfig()->get( 'AbuseFilterProfilerCache' );
|
||||
if ( $cacheType !== null ) {
|
||||
$cache = ObjectCache::getInstance( $cacheType );
|
||||
} else {
|
||||
$cache = $services->getMainObjectStash();
|
||||
}
|
||||
return new FilterProfiler(
|
||||
$cache,
|
||||
$services->getWRStatsFactory(),
|
||||
new ServiceOptions(
|
||||
FilterProfiler::CONSTRUCTOR_OPTIONS,
|
||||
$services->getMainConfig()
|
||||
|
|
|
@ -11,6 +11,8 @@ use Psr\Log\LoggerInterface;
|
|||
use Psr\Log\NullLogger;
|
||||
use TestLogger;
|
||||
use Title;
|
||||
use Wikimedia\WRStats\BagOStuffStatsStore;
|
||||
use Wikimedia\WRStats\WRStatsFactory;
|
||||
|
||||
/**
|
||||
* @coversDefaultClass \MediaWiki\Extension\AbuseFilter\FilterProfiler
|
||||
|
@ -36,13 +38,12 @@ class FilterProfilerTest extends MediaWikiUnitTestCase {
|
|||
private function getFilterProfiler( array $options = null, LoggerInterface $logger = null ): FilterProfiler {
|
||||
if ( $options === null ) {
|
||||
$options = [
|
||||
'AbuseFilterProfileActionsCap' => 10000,
|
||||
'AbuseFilterConditionLimit' => 1000,
|
||||
'AbuseFilterSlowFilterRuntimeLimit' => 500,
|
||||
];
|
||||
}
|
||||
return new FilterProfiler(
|
||||
new HashBagOStuff(),
|
||||
new WRStatsFactory( new BagOStuffStatsStore( new HashBagOStuff() ) ),
|
||||
new ServiceOptions( FilterProfiler::CONSTRUCTOR_OPTIONS, $options ),
|
||||
'wiki',
|
||||
$this->createMock( IBufferingStatsdDataFactory::class ),
|
||||
|
@ -266,47 +267,4 @@ class FilterProfilerTest extends MediaWikiUnitTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers ::checkResetProfiling
|
||||
* @covers ::filterProfileGroupKey
|
||||
*/
|
||||
public function testCheckResetProfiling() {
|
||||
$profiler = $this->getFilterProfiler( [
|
||||
'AbuseFilterProfileActionsCap' => 1,
|
||||
'AbuseFilterConditionLimit' => 1000,
|
||||
'AbuseFilterSlowFilterRuntimeLimit' => 500,
|
||||
] );
|
||||
|
||||
$profiler->recordPerFilterProfiling(
|
||||
$this->createMock( Title::class ),
|
||||
[
|
||||
'1' => [
|
||||
'time' => 12.5,
|
||||
'conds' => 5,
|
||||
'result' => false
|
||||
],
|
||||
'2' => [
|
||||
'time' => 34.5,
|
||||
'conds' => 3,
|
||||
'result' => true
|
||||
],
|
||||
'3' => [
|
||||
'time' => 34.5,
|
||||
'conds' => 5,
|
||||
'result' => true
|
||||
],
|
||||
]
|
||||
);
|
||||
|
||||
$profiler->recordStats( 'default', 100, 256.5, true );
|
||||
$profiler->recordStats( 'default', 200, 512.5, false );
|
||||
|
||||
$profiler->checkResetProfiling( 'default', [ '1', '2' ] );
|
||||
|
||||
$this->assertSame( self::NULL_GROUP_PROFILE, $profiler->getGroupProfile( 'default' ) );
|
||||
$this->assertSame( self::NULL_FILTER_PROFILE, $profiler->getFilterProfile( 1 ) );
|
||||
$this->assertSame( self::NULL_FILTER_PROFILE, $profiler->getFilterProfile( 2 ) );
|
||||
$this->assertNotSame( self::NULL_FILTER_PROFILE, $profiler->getFilterProfile( 3 ) );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue