[ 'time' => float, 'conds' => int, 'result' => bool ] ] * @phan-var array * * Where 'timeTaken' is in seconds, 'result' is a boolean indicating whether the filter matched * the action, and 'filterID' is "{prefix}-{ID}" ; Prefix should be empty for local * filters. In stash mode this member is saved in cache, while in execute mode it's used to * update profiling after checking all filters. */ protected $profilingData; /** * @param AbuseFilterHookRunner $hookRunner * @param FilterProfiler $filterProfiler * @param ChangeTagger $changeTagger * @param FilterLookup $filterLookup * @param ParserFactory $parserFactory * @param ConsequencesExecutorFactory $consExecutorFactory * @param AbuseLoggerFactory $abuseLoggerFactory * @param Watcher[] $watchers * @param LoggerInterface $logger * @param IBufferingStatsdDataFactory $statsdDataFactory * @param array $validFilterGroups * @param User $user * @param Title $title * @param AbuseFilterVariableHolder $vars * @param string $group * @throws InvalidArgumentException If $group is invalid or the 'action' variable is unset */ public function __construct( AbuseFilterHookRunner $hookRunner, FilterProfiler $filterProfiler, ChangeTagger $changeTagger, FilterLookup $filterLookup, ParserFactory $parserFactory, ConsequencesExecutorFactory $consExecutorFactory, AbuseLoggerFactory $abuseLoggerFactory, array $watchers, LoggerInterface $logger, IBufferingStatsdDataFactory $statsdDataFactory, array $validFilterGroups, User $user, Title $title, AbuseFilterVariableHolder $vars, string $group ) { $this->hookRunner = $hookRunner; $this->filterProfiler = $filterProfiler; $this->changeTagger = $changeTagger; $this->filterLookup = $filterLookup; $this->parserFactory = $parserFactory; $this->consExecutorFactory = $consExecutorFactory; $this->abuseLoggerFactory = $abuseLoggerFactory; $this->watchers = $watchers; $this->logger = $logger; $this->statsdDataFactory = $statsdDataFactory; if ( !in_array( $group, $validFilterGroups, true ) ) { throw new InvalidArgumentException( "Group $group is not a valid group" ); } if ( !$vars->varIsSet( 'action' ) ) { throw new InvalidArgumentException( "The 'action' variable is not set." ); } $this->user = $user; $this->title = $title; $this->vars = $vars; $this->vars->setLogger( $logger ); $this->group = $group; $this->action = $vars->getVar( 'action' )->toString(); } /** * Inits variables and parser right before running */ private function init() { // Add vars from extensions $this->hookRunner->onAbuseFilterFilterAction( $this->vars, $this->title ); $this->hookRunner->onAbuseFilterAlterVariables( $this->vars, $this->title, $this->user ); $generator = new VariableGenerator( $this->vars ); $this->vars = $generator->addGenericVars()->getVariableHolder(); $this->vars->forFilter = true; $this->vars->setVar( 'timestamp', (int)wfTimestamp( TS_UNIX ) ); $this->parser = $this->parserFactory->newParser( $this->vars ); $this->parser->setStatsd( $this->statsdDataFactory ); $this->profilingData = []; } /** * The main entry point of this class. This method runs all filters and takes their consequences. * * @param bool $allowStash Whether we are allowed to check the cache to see if there's a cached * result of a previous execution for the same edit. * @throws BadMethodCallException If run() was already called on this instance * @return Status Good if no action has been taken, a fatal otherwise. */ public function run( $allowStash = true ): Status { $this->init(); $skipReasons = []; $shouldFilter = $this->hookRunner->onAbuseFilterShouldFilterAction( $this->vars, $this->title, $this->user, $skipReasons ); if ( !$shouldFilter ) { $this->logger->info( 'Skipping action {action}. Reasons provided: {reasons}', [ 'action' => $this->action, 'reasons' => implode( ', ', $skipReasons ) ] ); return Status::newGood(); } $useStash = $allowStash && $this->action === 'edit'; $fromCache = false; $result = []; if ( $useStash ) { $cacheData = $this->seekCache(); if ( $cacheData !== false ) { // Use cached vars (T176291) and profiling data (T191430) $this->vars = AbuseFilterVariableHolder::newFromArray( $cacheData['vars'] ); $result = [ 'hitCondLimit' => $cacheData['hitCondLimit'], 'matches' => $cacheData['matches'], 'runtime' => $cacheData['runtime'], 'condCount' => $cacheData['condCount'], 'profiling' => $cacheData['profiling'] ]; $fromCache = true; } } if ( !$fromCache ) { $startTime = microtime( true ); // Ensure there's no extra time leftover AFComputedVariable::$profilingExtraTime = 0; $hitCondLimit = false; // This also updates $this->profilingData and $this->parser->mCondCount used later $matches = $this->checkAllFilters( $hitCondLimit ); $timeTaken = ( microtime( true ) - $startTime - AFComputedVariable::$profilingExtraTime ) * 1000; $result = [ 'hitCondLimit' => $hitCondLimit, 'matches' => $matches, 'runtime' => $timeTaken, 'condCount' => $this->parser->getCondCount(), 'profiling' => $this->profilingData ]; } '@phan-var array{hitCondLimit:bool,matches:array,runtime:int,condCount:int,profiling:array} $result'; $matchedFilters = array_keys( array_filter( $result['matches'] ) ); $allFilters = array_keys( $result['matches'] ); $this->profileExecution( $result, $matchedFilters, $allFilters ); if ( $result['hitCondLimit'] ) { $this->changeTagger->addConditionsLimitTag( $this->getSpecsForTagger() ); } if ( count( $matchedFilters ) === 0 ) { return Status::newGood(); } $executor = $this->consExecutorFactory->newExecutor( $this->user, $this->title, $this->vars ); $status = $executor->executeFilterActions( $matchedFilters ); $actionsTaken = $status->getValue(); // Note, it's important that we create an AbuseLogger now, after all lazy-loaded variables // requested by active filters have been computed $abuseLogger = $this->abuseLoggerFactory->newLogger( $this->title, $this->user, $this->vars ); [ 'local' => $loggedLocalFilters, 'global' => $loggedGlobalFilters ] = $abuseLogger->addLogEntries( $actionsTaken ); foreach ( $this->watchers as $watcher ) { $watcher->run( $loggedLocalFilters, $loggedGlobalFilters, $this->group ); } return $status; } /** * Similar to run(), but runs in "stash" mode, which means filters are executed, no actions are * taken, and the result is saved in cache to be later reused. This can only be used for edits, * and not doing so will throw. * * @throws InvalidArgumentException * @return Status Always a good status, since we're only saving data. */ public function runForStash() : Status { if ( $this->action !== 'edit' ) { throw new InvalidArgumentException( __METHOD__ . " can only be called for edits, called for action {$this->action}." ); } $this->init(); $skipReasons = []; $shouldFilter = $this->hookRunner->onAbuseFilterShouldFilterAction( $this->vars, $this->title, $this->user, $skipReasons ); if ( !$shouldFilter ) { // Don't log it yet return Status::newGood(); } $cache = ObjectCache::getLocalClusterInstance(); $stashKey = $this->getStashKey( $cache ); $startTime = microtime( true ); // Ensure there's no extra time leftover AFComputedVariable::$profilingExtraTime = 0; $hitCondLimit = false; $matchedFilters = $this->checkAllFilters( $hitCondLimit ); // Save the filter stash result and do nothing further $cacheData = [ 'matches' => $matchedFilters, 'hitCondLimit' => $hitCondLimit, 'condCount' => $this->parser->getCondCount(), 'runtime' => ( microtime( true ) - $startTime - AFComputedVariable::$profilingExtraTime ) * 1000, 'vars' => $this->vars->dumpAllVars(), 'profiling' => $this->profilingData ]; $cache->set( $stashKey, $cacheData, $cache::TTL_MINUTE ); $this->logCache( 'store', $stashKey ); return Status::newGood(); } /** * Search the cache to find data for a previous execution done for the current edit. * * @return false|array False on failure, the array with data otherwise */ protected function seekCache() { $cache = ObjectCache::getLocalClusterInstance(); $stashKey = $this->getStashKey( $cache ); $ret = $cache->get( $stashKey ); $status = $ret !== false ? 'hit' : 'miss'; $this->logCache( $status, $stashKey ); return $ret; } /** * Get the stash key for the current variables * * @param BagOStuff $cache * @return string */ protected function getStashKey( BagOStuff $cache ) { $inputVars = $this->vars->exportNonLazyVars(); // Exclude noisy fields that have superficial changes $excludedVars = [ 'old_html' => true, 'new_html' => true, 'user_age' => true, 'timestamp' => true, 'page_age' => true, 'moved_from_age' => true, 'moved_to_age' => true ]; $inputVars = array_diff_key( $inputVars, $excludedVars ); ksort( $inputVars ); $hash = md5( serialize( $inputVars ) ); return $cache->makeKey( 'abusefilter', 'check-stash', $this->group, $hash, 'v2' ); } /** * Log cache operations related to stashed edits, i.e. store, hit and miss * * @param string $type Either 'store', 'hit' or 'miss' * @param string $key The cache key used * @throws InvalidArgumentException */ protected function logCache( $type, $key ) { if ( !in_array( $type, [ 'store', 'hit', 'miss' ] ) ) { throw new InvalidArgumentException( '$type must be either "store", "hit" or "miss"' ); } $logger = LoggerFactory::getInstance( 'StashEdit' ); // Bots do not use edit stashing, so avoid distorting the stats $statsd = $this->user->isBot() ? new NullStatsdDataFactory() : $this->statsdDataFactory; $logger->debug( __METHOD__ . ": cache $type for '{$this->title}' (key $key)." ); $statsd->increment( "abusefilter.check-stash.$type" ); } /** * Returns an associative array of filters which were tripped * * @protected Public for back compat only; this will actually be made protected in the future. * You should either rely on $this->run() or subclass this class. * @param bool|null &$hitCondLimit TEMPORARY * @return bool[] Map of (integer filter ID => bool) */ public function checkAllFilters( &$hitCondLimit = false ) : array { global $wgAbuseFilterCentralDB, $wgAbuseFilterIsCentral, $wgAbuseFilterConditionLimit; // Ensure that we start fresh, see T193374 $this->parser->resetCondCount(); $matchedFilters = []; foreach ( $this->filterLookup->getAllActiveFiltersInGroup( $this->group, false ) as $filter ) { // @phan-suppress-next-line PhanTypeMismatchDimAssignment $matchedFilters[$filter->getID()] = $this->checkFilter( $filter ); } if ( $wgAbuseFilterCentralDB && !$wgAbuseFilterIsCentral ) { foreach ( $this->filterLookup->getAllActiveFiltersInGroup( $this->group, true ) as $filter ) { // @phan-suppress-next-line PhanTypeMismatchArgumentNullable $matchedFilters[GlobalNameUtils::buildGlobalName( $filter->getID() )] = $this->checkFilter( $filter, true ); } } // Tag the action if the condition limit was hit $hitCondLimit = $this->parser->getCondCount() > $wgAbuseFilterConditionLimit; return $matchedFilters; } /** * Check the conditions of a single filter, and profile it if $this->executeMode is true * * @param Filter $filter * @param bool $global * @return bool */ protected function checkFilter( Filter $filter, $global = false ) { // @phan-suppress-next-line PhanTypeMismatchArgumentNullable $filterName = GlobalNameUtils::buildGlobalName( $filter->getID(), $global ); $startConds = $this->parser->getCondCount(); $startTime = microtime( true ); $origExtraTime = AFComputedVariable::$profilingExtraTime; $this->parser->setFilter( $filterName ); $result = $this->parser->checkConditions( $filter->getRules(), true, $filterName )->getResult(); $actualExtra = AFComputedVariable::$profilingExtraTime - $origExtraTime; $timeTaken = 1000 * ( microtime( true ) - $startTime - $actualExtra ); $condsUsed = $this->parser->getCondCount() - $startConds; $this->profilingData[$filterName] = [ 'time' => $timeTaken, 'conds' => $condsUsed, 'result' => $result ]; return $result; } /** * @param array $result Result of the execution, as created in run() * @param string[] $matchedFilters * @param string[] $allFilters */ protected function profileExecution( array $result, array $matchedFilters, array $allFilters ) { $this->filterProfiler->checkResetProfiling( $this->group, $allFilters ); $this->filterProfiler->recordRuntimeProfilingResult( count( $allFilters ), $result['condCount'], $result['runtime'] ); $this->filterProfiler->recordPerFilterProfiling( $this->title, $result['profiling'] ); $this->filterProfiler->recordStats( $this->group, $result['condCount'], $result['runtime'], (bool)$matchedFilters ); } /** * @return array */ private function getSpecsForTagger() : array { return [ 'action' => $this->action, 'username' => $this->user->getName(), 'target' => $this->title, 'accountname' => $this->vars->getVar( 'accountname', AbuseFilterVariableHolder::GET_BC )->toNative() ]; } }