2020-09-20 22:31:41 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace MediaWiki\Extension\AbuseFilter;
|
|
|
|
|
|
|
|
use BagOStuff;
|
|
|
|
use IBufferingStatsdDataFactory;
|
|
|
|
use MediaWiki\Config\ServiceOptions;
|
|
|
|
use Psr\Log\LoggerInterface;
|
|
|
|
use Title;
|
2022-06-23 01:13:09 +00:00
|
|
|
use Wikimedia\WRStats\LocalEntityKey;
|
|
|
|
use Wikimedia\WRStats\WRStatsFactory;
|
2020-09-20 22:31:41 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This class is used to create, store, and retrieve profiling information for single filters and
|
|
|
|
* groups of filters.
|
2022-09-29 16:54:36 +00:00
|
|
|
*
|
2020-09-20 22:31:41 +00:00
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
class FilterProfiler {
|
|
|
|
public const SERVICE_NAME = 'AbuseFilterFilterProfiler';
|
|
|
|
|
|
|
|
public const CONSTRUCTOR_OPTIONS = [
|
|
|
|
'AbuseFilterConditionLimit',
|
|
|
|
'AbuseFilterSlowFilterRuntimeLimit',
|
|
|
|
];
|
|
|
|
|
|
|
|
/**
|
2022-06-23 01:13:09 +00:00
|
|
|
* How long to keep profiling data in cache (in seconds)
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2020-10-10 09:37:14 +00:00
|
|
|
private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY;
|
2020-09-20 22:31:41 +00:00
|
|
|
|
2022-06-23 01:13:09 +00:00
|
|
|
/** The stats time bucket size */
|
|
|
|
private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12;
|
|
|
|
|
|
|
|
/** The WRStats spec common to all metrics */
|
|
|
|
private const STATS_TEMPLATE = [
|
|
|
|
'sequences' => [ [
|
|
|
|
'timeStep' => self::STATS_TIME_STEP,
|
|
|
|
'expiry' => self::STATS_STORAGE_PERIOD,
|
|
|
|
] ],
|
|
|
|
];
|
|
|
|
|
|
|
|
private const KEY_PREFIX = 'abusefilter-profile';
|
|
|
|
|
|
|
|
/** @var WRStatsFactory */
|
|
|
|
private $statsFactory;
|
2020-09-20 22:31:41 +00:00
|
|
|
|
|
|
|
/** @var ServiceOptions */
|
|
|
|
private $options;
|
|
|
|
|
|
|
|
/** @var string */
|
|
|
|
private $localWikiID;
|
|
|
|
|
|
|
|
/** @var IBufferingStatsdDataFactory */
|
|
|
|
private $statsd;
|
|
|
|
|
|
|
|
/** @var LoggerInterface */
|
|
|
|
private $logger;
|
|
|
|
|
2022-06-23 01:13:09 +00:00
|
|
|
/** @var array */
|
|
|
|
private $statsSpecs;
|
|
|
|
|
2020-09-20 22:31:41 +00:00
|
|
|
/**
|
2022-06-23 01:13:09 +00:00
|
|
|
* @param WRStatsFactory $statsFactory
|
2020-09-20 22:31:41 +00:00
|
|
|
* @param ServiceOptions $options
|
|
|
|
* @param string $localWikiID
|
|
|
|
* @param IBufferingStatsdDataFactory $statsd
|
|
|
|
* @param LoggerInterface $logger
|
|
|
|
*/
|
|
|
|
public function __construct(
|
2022-06-23 01:13:09 +00:00
|
|
|
WRStatsFactory $statsFactory,
|
2020-09-20 22:31:41 +00:00
|
|
|
ServiceOptions $options,
|
|
|
|
string $localWikiID,
|
|
|
|
IBufferingStatsdDataFactory $statsd,
|
|
|
|
LoggerInterface $logger
|
|
|
|
) {
|
|
|
|
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
|
2022-06-23 01:13:09 +00:00
|
|
|
$this->statsFactory = $statsFactory;
|
2020-09-20 22:31:41 +00:00
|
|
|
$this->options = $options;
|
|
|
|
$this->localWikiID = $localWikiID;
|
|
|
|
$this->statsd = $statsd;
|
|
|
|
$this->logger = $logger;
|
2022-06-23 01:13:09 +00:00
|
|
|
$this->statsSpecs = [
|
|
|
|
'count' => self::STATS_TEMPLATE,
|
|
|
|
'total' => self::STATS_TEMPLATE,
|
|
|
|
'overflow' => self::STATS_TEMPLATE,
|
|
|
|
'matches' => self::STATS_TEMPLATE,
|
|
|
|
'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE,
|
|
|
|
'total-cond' => self::STATS_TEMPLATE
|
|
|
|
];
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2020-12-02 18:08:37 +00:00
|
|
|
* @param int $filter
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
public function resetFilterProfile( int $filter ): void {
|
2022-06-23 01:13:09 +00:00
|
|
|
$writer = $this->statsFactory->createWriter(
|
|
|
|
$this->statsSpecs,
|
|
|
|
self::KEY_PREFIX
|
|
|
|
);
|
|
|
|
$writer->resetAll( [ $this->filterProfileKey( $filter ) ] );
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieve per-filter statistics.
|
|
|
|
*
|
2020-12-02 18:08:37 +00:00
|
|
|
* @param int $filter
|
2020-10-27 09:17:37 +00:00
|
|
|
* @return array See self::NULL_FILTER_PROFILE for the returned array structure
|
|
|
|
* @phan-return array{count:int,matches:int,total-time:float,total-cond:int}
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
public function getFilterProfile( int $filter ): array {
|
2022-06-23 01:13:09 +00:00
|
|
|
$reader = $this->statsFactory->createReader(
|
|
|
|
$this->statsSpecs,
|
|
|
|
self::KEY_PREFIX
|
|
|
|
);
|
|
|
|
return $reader->total( $reader->getRates(
|
|
|
|
[ 'count', 'matches', 'total-time', 'total-cond' ],
|
|
|
|
$this->filterProfileKey( $filter ),
|
|
|
|
$reader->latest( self::STATS_STORAGE_PERIOD )
|
|
|
|
) );
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2020-10-27 09:17:37 +00:00
|
|
|
* Retrieve per-group statistics.
|
|
|
|
*
|
2020-09-20 22:31:41 +00:00
|
|
|
* @param string $group
|
2020-10-27 09:17:37 +00:00
|
|
|
* @return array See self::NULL_GROUP_PROFILE for the returned array structure
|
|
|
|
* @phan-return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int}
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
public function getGroupProfile( string $group ): array {
|
2022-06-23 01:13:09 +00:00
|
|
|
$reader = $this->statsFactory->createReader(
|
|
|
|
$this->statsSpecs,
|
|
|
|
self::KEY_PREFIX
|
|
|
|
);
|
|
|
|
return $reader->total( $reader->getRates(
|
|
|
|
[ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ],
|
|
|
|
$this->filterProfileGroupKey( $group ),
|
|
|
|
$reader->latest( self::STATS_STORAGE_PERIOD )
|
|
|
|
) );
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Record per-filter profiling data
|
|
|
|
*
|
|
|
|
* @param int $filter
|
|
|
|
* @param float $time Time taken, in milliseconds
|
|
|
|
* @param int $conds
|
|
|
|
* @param bool $matched
|
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void {
|
2022-06-23 01:13:09 +00:00
|
|
|
$key = $this->filterProfileKey( $filter );
|
|
|
|
$writer = $this->statsFactory->createWriter(
|
|
|
|
$this->statsSpecs,
|
|
|
|
self::KEY_PREFIX
|
2021-01-27 23:10:38 +00:00
|
|
|
);
|
2022-06-23 01:13:09 +00:00
|
|
|
$writer->incr( 'count', $key );
|
|
|
|
if ( $matched ) {
|
|
|
|
$writer->incr( 'matches', $key );
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
2022-06-23 01:13:09 +00:00
|
|
|
$writer->incr( 'total-time', $key, $time );
|
|
|
|
$writer->incr( 'total-cond', $key, $conds );
|
|
|
|
$writer->flush();
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Update global statistics
|
|
|
|
*
|
|
|
|
* @param string $group
|
|
|
|
* @param int $condsUsed The amount of used conditions
|
|
|
|
* @param float $totalTime Time taken, in milliseconds
|
|
|
|
* @param bool $anyMatch Whether at least one filter matched the action
|
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void {
|
2022-06-23 01:13:09 +00:00
|
|
|
$writer = $this->statsFactory->createWriter(
|
|
|
|
$this->statsSpecs,
|
|
|
|
self::KEY_PREFIX
|
|
|
|
);
|
|
|
|
$key = $this->filterProfileGroupKey( $group );
|
2020-09-20 22:31:41 +00:00
|
|
|
|
2022-06-23 01:13:09 +00:00
|
|
|
$writer->incr( 'total', $key );
|
|
|
|
$writer->incr( 'total-time', $key, $totalTime );
|
|
|
|
$writer->incr( 'total-cond', $key, $condsUsed );
|
2020-09-20 22:31:41 +00:00
|
|
|
|
2022-06-23 01:13:09 +00:00
|
|
|
// Increment overflow counter, if our condition limit overflowed
|
|
|
|
if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) {
|
|
|
|
$writer->incr( 'overflow', $key );
|
|
|
|
}
|
2020-09-20 22:31:41 +00:00
|
|
|
|
2022-06-23 01:13:09 +00:00
|
|
|
// Increment counter by 1 if there was at least one match
|
|
|
|
if ( $anyMatch ) {
|
|
|
|
$writer->incr( 'matches', $key );
|
|
|
|
}
|
|
|
|
$writer->flush();
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Record runtime profiling data for all filters together
|
|
|
|
*
|
|
|
|
* @param int $totalFilters
|
|
|
|
* @param int $totalConditions
|
|
|
|
* @param float $runtime
|
2020-11-19 17:49:55 +00:00
|
|
|
* @codeCoverageIgnore
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ): void {
|
2020-09-20 22:31:41 +00:00
|
|
|
$keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.';
|
|
|
|
|
|
|
|
$this->statsd->timing( $keyPrefix . 'runtime', $runtime );
|
|
|
|
$this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters );
|
|
|
|
$this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions );
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Record per-filter profiling, for all filters
|
|
|
|
*
|
|
|
|
* @param Title $title
|
2021-02-07 14:45:57 +00:00
|
|
|
* @param array $data Profiling data
|
2020-09-20 22:31:41 +00:00
|
|
|
* @phan-param array<string,array{time:float,conds:int,result:bool}> $data
|
|
|
|
*/
|
2021-07-21 18:51:12 +00:00
|
|
|
public function recordPerFilterProfiling( Title $title, array $data ): void {
|
2021-02-28 10:56:49 +00:00
|
|
|
$slowFilterThreshold = $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' );
|
|
|
|
|
|
|
|
foreach ( $data as $filterName => $params ) {
|
|
|
|
list( $filterID, $global ) = GlobalNameUtils::splitGlobalName( $filterName );
|
|
|
|
// @todo Maybe add a parameter to recordProfilingResult to record global filters
|
|
|
|
// data separately (in the foreign wiki)
|
|
|
|
if ( !$global ) {
|
|
|
|
$this->recordProfilingResult(
|
|
|
|
$filterID,
|
|
|
|
$params['time'],
|
|
|
|
$params['conds'],
|
|
|
|
$params['result']
|
|
|
|
);
|
|
|
|
}
|
2020-09-20 22:31:41 +00:00
|
|
|
|
2021-02-28 10:56:49 +00:00
|
|
|
if ( $params['time'] > $slowFilterThreshold ) {
|
|
|
|
$this->recordSlowFilter(
|
|
|
|
$title,
|
|
|
|
$filterName,
|
|
|
|
$params['time'],
|
|
|
|
$params['conds'],
|
|
|
|
$params['result'],
|
|
|
|
$global
|
|
|
|
);
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
2021-02-28 10:56:49 +00:00
|
|
|
}
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Logs slow filter's runtime data for later analysis
|
|
|
|
*
|
|
|
|
* @param Title $title
|
|
|
|
* @param string $filterId
|
|
|
|
* @param float $runtime
|
|
|
|
* @param int $totalConditions
|
|
|
|
* @param bool $matched
|
2020-11-14 21:23:22 +00:00
|
|
|
* @param bool $global
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
|
|
|
private function recordSlowFilter(
|
|
|
|
Title $title,
|
|
|
|
string $filterId,
|
|
|
|
float $runtime,
|
|
|
|
int $totalConditions,
|
2020-11-14 21:23:22 +00:00
|
|
|
bool $matched,
|
|
|
|
bool $global
|
2021-07-21 18:51:12 +00:00
|
|
|
): void {
|
2020-09-20 22:31:41 +00:00
|
|
|
$this->logger->info(
|
|
|
|
'Edit filter {filter_id} on {wiki} is taking longer than expected',
|
|
|
|
[
|
|
|
|
'wiki' => $this->localWikiID,
|
|
|
|
'filter_id' => $filterId,
|
|
|
|
'title' => $title->getPrefixedText(),
|
|
|
|
'runtime' => $runtime,
|
|
|
|
'matched' => $matched,
|
2020-11-14 21:23:22 +00:00
|
|
|
'total_conditions' => $totalConditions,
|
|
|
|
'global' => $global
|
2020-09-20 22:31:41 +00:00
|
|
|
]
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2022-06-23 01:13:09 +00:00
|
|
|
* Get the WRStats entity key used to store per-filter profiling data.
|
2020-09-20 22:31:41 +00:00
|
|
|
*
|
2020-12-02 18:08:37 +00:00
|
|
|
* @param int $filter
|
2022-06-23 01:13:09 +00:00
|
|
|
* @return LocalEntityKey
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2022-06-23 01:13:09 +00:00
|
|
|
private function filterProfileKey( int $filter ): LocalEntityKey {
|
|
|
|
return new LocalEntityKey( [ 'filter', (string)$filter ] );
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2022-06-23 01:13:09 +00:00
|
|
|
* WRStats entity key used to store overall profiling data for rule groups
|
2020-09-20 22:31:41 +00:00
|
|
|
*
|
|
|
|
* @param string $group
|
2022-06-23 01:13:09 +00:00
|
|
|
* @return LocalEntityKey
|
2020-09-20 22:31:41 +00:00
|
|
|
*/
|
2022-06-23 01:13:09 +00:00
|
|
|
private function filterProfileGroupKey( string $group ): LocalEntityKey {
|
|
|
|
return new LocalEntityKey( [ 'group', $group ] );
|
2020-09-20 22:31:41 +00:00
|
|
|
}
|
|
|
|
}
|