mediawiki-extensions-AbuseF.../includes/AFComputedVariable.php

574 lines
18 KiB
PHP
Raw Normal View History

2009-02-27 00:03:30 +00:00
<?php
use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MediaWikiServices;
use Wikimedia\IPUtils;
use Wikimedia\Rdbms\Database;
2009-02-27 00:03:30 +00:00
class AFComputedVariable {
/**
* @var string The method used to compute the variable
*/
public $mMethod;
/**
* @var array Parameters to be used with the specified method
*/
public $mParameters;
/**
* @var User[] Cache containing User objects already constructed
*/
public static $userCache = [];
/**
* @var WikiPage[] Cache containing Page objects already constructed
*/
public static $articleCache = [];
/** @var float The amount of time to subtract from profiling */
public static $profilingExtraTime = 0;
/**
* @param string $method
* @param array $parameters
*/
public function __construct( $method, $parameters ) {
2009-02-27 00:03:30 +00:00
$this->mMethod = $method;
$this->mParameters = $parameters;
}
/**
* It's like Article::prepareContentForEdit, but not for editing (old wikitext usually)
*
*
* @param string $wikitext
* @param WikiPage $article
* @param User $user Context user
*
* @return stdClass
*/
public function parseNonEditWikitext( $wikitext, WikiPage $article, User $user ) {
static $cache = [];
$cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText();
2009-05-22 06:42:10 +00:00
if ( isset( $cache[$cacheKey] ) ) {
2009-02-27 00:03:30 +00:00
return $cache[$cacheKey];
2009-05-22 06:42:10 +00:00
}
$edit = (object)[];
$options = ParserOptions::newFromUser( $user );
$parser = MediaWikiServices::getInstance()->getParser();
$edit->output = $parser->parse( $wikitext, $article->getTitle(), $options );
2009-02-27 00:03:30 +00:00
$cache[$cacheKey] = $edit;
2009-02-27 00:03:30 +00:00
return $edit;
}
/**
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
* For backwards compatibility: Get the user object belonging to a certain name
* in case a user name is given as argument. Nowadays user objects are passed
* directly but many old log entries rely on this.
*
* @param string|User $user
* @return User
*/
public static function getUserObject( $user ) {
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
if ( $user instanceof User ) {
$username = $user->getName();
} else {
$username = $user;
if ( isset( self::$userCache[$username] ) ) {
return self::$userCache[$username];
}
$logger = LoggerFactory::getInstance( 'AbuseFilter' );
$logger->debug( "Couldn't find user $username in cache" );
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
}
if ( count( self::$userCache ) > 1000 ) {
self::$userCache = [];
}
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
if ( $user instanceof User ) {
$ret = $user;
} elseif ( IPUtils::isIPAddress( $username ) ) {
$ret = new User;
$ret->setName( $username );
} else {
$ret = User::newFromName( $username );
$ret->load();
}
self::$userCache[$username] = $ret;
return $ret;
2009-02-27 00:03:30 +00:00
}
/**
* @param int $namespace
* @param string $title
* @return WikiPage
*/
public function pageFromTitle( $namespace, $title ) {
2009-05-22 06:42:10 +00:00
if ( isset( self::$articleCache["$namespace:$title"] ) ) {
2009-02-27 00:03:30 +00:00
return self::$articleCache["$namespace:$title"];
2009-05-22 06:42:10 +00:00
}
if ( count( self::$articleCache ) > 1000 ) {
self::$articleCache = [];
}
$logger = LoggerFactory::getInstance( 'AbuseFilter' );
$logger->debug( "Creating wikipage object for $namespace:$title in cache" );
$t = $this->buildTitle( $namespace, $title );
self::$articleCache["$namespace:$title"] = WikiPage::factory( $t );
2009-02-27 00:03:30 +00:00
return self::$articleCache["$namespace:$title"];
}
/**
* Mockable wrapper
*
* @param int $namespace
* @param string $title
* @return Title
*/
protected function buildTitle( $namespace, $title ) : Title {
return Title::makeTitle( $namespace, $title );
}
/**
* @param WikiPage $article
* @return array
*/
public static function getLinksFromDB( WikiPage $article ) {
// Stolen from ConfirmEdit, SimpleCaptcha::getLinksFromTracker
$id = $article->getId();
2009-05-22 06:42:10 +00:00
if ( !$id ) {
return [];
2009-05-22 06:42:10 +00:00
}
$dbr = wfGetDB( DB_REPLICA );
return $dbr->selectFieldValues(
'externallinks',
'el_to',
[ 'el_from' => $id ],
__METHOD__
);
}
/**
* @param AbuseFilterVariableHolder $vars
* @return AFPData
* @throws MWException
* @throws AFPException
*/
public function compute( AbuseFilterVariableHolder $vars ) {
// phpcs:ignore MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgUser
global $wgUser;
// Used for parsing wikitext from saved revisions and checking for
// whether to show fields. Do not use $wgUser below here, in preparation
// for eventually injecting. See T246733
$computeForUser = $wgUser;
$vars->setLogger( LoggerFactory::getInstance( 'AbuseFilter' ) );
2009-02-27 00:03:30 +00:00
$parameters = $this->mParameters;
$result = null;
$hookRunner = AbuseFilterHookRunner::getRunner();
if ( !$hookRunner->onAbuseFilterInterceptVariable(
$this->mMethod,
$vars,
$parameters,
$result
) ) {
return $result instanceof AFPData
? $result : AFPData::newFromPHPVar( $result );
}
$services = MediaWikiServices::getInstance();
switch ( $this->mMethod ) {
2009-02-27 00:03:30 +00:00
case 'diff':
// Currently unused. Kept for backwards compatibility since it remains
// as mMethod for old variables. A fallthrough would instead change old results.
2009-02-27 00:03:30 +00:00
$text1Var = $parameters['oldtext-var'];
$text2Var = $parameters['newtext-var'];
$text1 = $vars->getVar( $text1Var )->toString();
$text2 = $vars->getVar( $text2Var )->toString();
$diffs = new Diff( explode( "\n", $text1 ), explode( "\n", $text2 ) );
$format = new UnifiedDiffFormatter();
$result = $format->format( $diffs );
2009-02-27 00:03:30 +00:00
break;
case 'diff-array':
// Introduced with T74329 to uniform the diff to MW's standard one.
// The difference with 'diff' method is noticeable when one of the
// $text is empty: it'll be treated as **really** empty, instead of
// an empty string.
$text1Var = $parameters['oldtext-var'];
$text2Var = $parameters['newtext-var'];
$text1 = $vars->getVar( $text1Var )->toString();
$text2 = $vars->getVar( $text2Var )->toString();
$text1 = $text1 === '' ? [] : explode( "\n", $text1 );
$text2 = $text2 === '' ? [] : explode( "\n", $text2 );
$diffs = new Diff( $text1, $text2 );
$format = new UnifiedDiffFormatter();
$result = $format->format( $diffs );
break;
2009-02-27 00:03:30 +00:00
case 'diff-split':
$diff = $vars->getVar( $parameters['diff-var'] )->toString();
$line_prefix = $parameters['line-prefix'];
$diff_lines = explode( "\n", $diff );
$result = [];
foreach ( $diff_lines as $line ) {
if ( substr( $line, 0, 1 ) === $line_prefix ) {
$result[] = substr( $line, strlen( $line_prefix ) );
2009-02-27 00:03:30 +00:00
}
}
break;
case 'links-from-wikitext':
// This should ONLY be used when sharing a parse operation with the edit.
/* @var WikiPage $article */
if ( isset( $parameters['article'] ) ) {
$article = $parameters['article'];
} else {
$article = $this->pageFromTitle(
$parameters['namespace'],
$parameters['title']
);
}
if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
// Shared with the edit, don't count it in profiling
$startTime = microtime( true );
$textVar = $parameters['text-var'];
$new_text = $vars->getVar( $textVar )->toString();
$content = ContentHandler::makeContent( $new_text, $article->getTitle() );
try {
// @fixme TEMPORARY WORKAROUND FOR T187153
$editInfo = $article->prepareContentForEdit( $content );
$links = array_keys( $editInfo->output->getExternalLinks() );
} catch ( Error $e ) {
$logger = LoggerFactory::getInstance( 'AbuseFilter' );
$logger->warning( 'Caught Error, case 1 - T187153' );
$links = [];
}
$result = $links;
self::$profilingExtraTime += ( microtime( true ) - $startTime );
break;
}
// Otherwise fall back to database
2009-03-19 03:10:18 +00:00
case 'links-from-wikitext-nonedit':
case 'links-from-wikitext-or-database':
// TODO: use Content object instead, if available!
$article = $this->pageFromTitle(
$parameters['namespace'],
$parameters['title']
);
$logger = LoggerFactory::getInstance( 'AbuseFilter' );
if ( $vars->forFilter ) {
$links = $this->getLinksFromDB( $article );
$logger->debug( 'Loading old links from DB' );
} elseif ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
$logger->debug( 'Loading old links from Parser' );
$textVar = $parameters['text-var'];
$wikitext = $vars->getVar( $textVar )->toString();
$editInfo = $this->parseNonEditWikitext(
$wikitext,
$article,
$computeForUser
);
$links = array_keys( $editInfo->output->getExternalLinks() );
} else {
// TODO: Get links from Content object. But we don't have the content object.
// And for non-text content, $wikitext is usually not going to be a valid
// serialization, but rather some dummy text for filtering.
$links = [];
}
$result = $links;
2009-02-27 00:03:30 +00:00
break;
case 'link-diff-added':
case 'link-diff-removed':
$oldLinkVar = $parameters['oldlink-var'];
$newLinkVar = $parameters['newlink-var'];
2009-02-27 00:03:30 +00:00
$oldLinks = $vars->getVar( $oldLinkVar )->toString();
$newLinks = $vars->getVar( $newLinkVar )->toString();
2009-02-27 00:03:30 +00:00
$oldLinks = explode( "\n", $oldLinks );
$newLinks = explode( "\n", $newLinks );
if ( $this->mMethod === 'link-diff-added' ) {
$result = array_diff( $newLinks, $oldLinks );
}
if ( $this->mMethod === 'link-diff-removed' ) {
$result = array_diff( $oldLinks, $newLinks );
}
2009-02-27 00:03:30 +00:00
break;
case 'parse-wikitext':
// Should ONLY be used when sharing a parse operation with the edit.
if ( isset( $parameters['article'] ) ) {
$article = $parameters['article'];
} else {
$article = $this->pageFromTitle(
$parameters['namespace'],
$parameters['title']
);
}
if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
// Shared with the edit, don't count it in profiling
$startTime = microtime( true );
$textVar = $parameters['wikitext-var'];
$new_text = $vars->getVar( $textVar )->toString();
$content = ContentHandler::makeContent( $new_text, $article->getTitle() );
try {
// @fixme TEMPORARY WORKAROUND FOR T187153
$editInfo = $article->prepareContentForEdit( $content );
} catch ( Error $e ) {
$logger = LoggerFactory::getInstance( 'AbuseFilter' );
$logger->warning( 'Caught Error, case 2 - T187153' );
$result = '';
break;
}
if ( isset( $parameters['pst'] ) && $parameters['pst'] ) {
$result = $editInfo->pstContent->serialize( $editInfo->format );
} else {
$newHTML = $editInfo->output->getText();
// Kill the PP limit comments. Ideally we'd just remove these by not setting the
// parser option, but then we can't share a parse operation with the edit, which is bad.
// @fixme No awfulness scale can measure how awful this hack is.
$re = '/<!--\s*NewPP limit [^>]*-->\s*(?:<!--\s*Transclusion [^>]+-->\s*)?(?:<\/div>\s*)?$/i';
$result = preg_replace( $re, '', $newHTML );
}
self::$profilingExtraTime += ( microtime( true ) - $startTime );
break;
}
// Otherwise fall back to database
2009-02-27 00:03:30 +00:00
case 'parse-wikitext-nonedit':
// TODO: use Content object instead, if available!
$article = $this->pageFromTitle( $parameters['namespace'], $parameters['title'] );
2009-02-27 00:03:30 +00:00
$textVar = $parameters['wikitext-var'];
if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
if ( isset( $parameters['pst'] ) && $parameters['pst'] ) {
// $textVar is already PSTed when it's not loaded from an ongoing edit.
$result = $vars->getVar( $textVar )->toString();
} else {
$text = $vars->getVar( $textVar )->toString();
$editInfo = $this->parseNonEditWikitext(
$text,
$article,
$computeForUser
);
$result = $editInfo->output->getText();
}
} else {
// TODO: Parser Output from Content object. But we don't have the content object.
// And for non-text content, $wikitext is usually not going to be a valid
// serialization, but rather some dummy text for filtering.
$result = '';
}
2009-02-27 00:03:30 +00:00
break;
case 'strip-html':
$htmlVar = $parameters['html-var'];
$html = $vars->getVar( $htmlVar )->toString();
$stripped = StringUtils::delimiterReplace( '<', '>', '', $html );
// We strip extra spaces to the right because the stripping above
// could leave a lot of whitespace.
// @fixme Find a better way to do this.
$result = TextContent::normalizeLineEndings( $stripped );
2009-02-27 00:03:30 +00:00
break;
case 'load-recent-authors':
$title = $this->buildTitle( $parameters['namespace'], $parameters['title'] );
if ( !$title->exists() ) {
$result = '';
break;
}
$result = self::getLastPageAuthors( $title );
break;
case 'load-first-author':
$title = $this->buildTitle( $parameters['namespace'], $parameters['title'] );
$revision = $services->getRevisionLookup()->getFirstRevision( $title );
if ( $revision ) {
$user = $revision->getUser();
$result = $user === null ? '' : $user->getName();
} else {
$result = '';
}
2009-02-27 00:03:30 +00:00
break;
case 'get-page-restrictions':
$action = $parameters['action'];
$title = $this->buildTitle( $parameters['namespace'], $parameters['title'] );
$result = $title->getRestrictions( $action );
2009-02-27 00:03:30 +00:00
break;
case 'simple-user-accessor':
$user = $parameters['user'];
$method = $parameters['method'];
if ( !$user ) {
throw new MWException( 'No user parameter given.' );
2009-02-27 13:16:34 +00:00
}
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
$obj = self::getUserObject( $user );
if ( !$obj ) {
2009-02-27 13:16:34 +00:00
throw new MWException( "Invalid username $user" );
}
$result = call_user_func( [ $obj, $method ] );
2009-02-27 00:03:30 +00:00
break;
case 'user-block':
// @todo Support partial blocks
$user = $parameters['user'];
$result = (bool)$user->getBlock();
break;
2009-02-27 00:03:30 +00:00
case 'user-age':
$user = $parameters['user'];
$asOf = $parameters['asof'];
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
$obj = self::getUserObject( $user );
$registration = $obj->getRegistration();
if ( $obj->getId() === 0 ) {
$result = 0;
} else {
// HACK: If there's no registration date, assume 2008-01-15, Wikipedia Day
// in the year before the new user log was created. See T243469.
if ( $registration === null ) {
$registration = "20080115000000";
}
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $registration );
}
2009-02-27 00:03:30 +00:00
break;
case 'page-age':
$title = $this->buildTitle( $parameters['namespace'], $parameters['title'] );
$firstRevisionTime = $title->getEarliestRevTime();
if ( !$firstRevisionTime ) {
$result = 0;
break;
}
$asOf = $parameters['asof'];
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $firstRevisionTime );
break;
2009-02-27 00:03:30 +00:00
case 'user-groups':
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
// Deprecated but needed by old log entries
2009-02-27 00:03:30 +00:00
$user = $parameters['user'];
AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6
2013-01-07 00:02:41 +00:00
$obj = self::getUserObject( $user );
$result = $obj->getEffectiveGroups();
2009-02-27 00:03:30 +00:00
break;
case 'length':
$s = $vars->getVar( $parameters['length-var'] )->toString();
$result = strlen( $s );
break;
case 'subtract':
// Currently unused, kept for backwards compatibility for old filters.
$v1 = $vars->getVar( $parameters['val1-var'] )->toFloat();
$v2 = $vars->getVar( $parameters['val2-var'] )->toFloat();
$result = $v1 - $v2;
break;
case 'subtract-int':
$v1 = $vars->getVar( $parameters['val1-var'] )->toInt();
$v2 = $vars->getVar( $parameters['val2-var'] )->toInt();
2009-02-27 00:03:30 +00:00
$result = $v1 - $v2;
break;
case 'revision-text-by-id':
$revRec = $services
->getRevisionLookup()
->getRevisionById( $parameters['revid'] );
$result = AbuseFilter::revisionToString( $revRec, $computeForUser );
2009-02-27 00:03:30 +00:00
break;
case 'revision-text-by-timestamp':
$timestamp = $parameters['timestamp'];
if ( $timestamp === null ) {
// Temporary BC for T246539#6388362
$result = '[Revision text not available]';
break;
}
$title = $this->buildTitle( $parameters['namespace'], $parameters['title'] );
$revRec = $services
->getRevisionStore()
->getRevisionByTimestamp( $title, $timestamp );
$result = AbuseFilter::revisionToString( $revRec, $computeForUser );
2009-02-27 00:03:30 +00:00
break;
case 'get-wiki-name':
$result = WikiMap::getCurrentWikiDbDomain()->getId();
break;
case 'get-wiki-language':
$result = $services->getContentLanguage()->getCode();
break;
2009-02-27 00:03:30 +00:00
default:
if ( $hookRunner->onAbuseFilterComputeVariable(
$this->mMethod,
$vars,
$parameters,
$result
) ) {
throw new AFPException( 'Unknown variable compute type ' . $this->mMethod );
}
2009-02-27 00:03:30 +00:00
}
return $result instanceof AFPData
? $result : AFPData::newFromPHPVar( $result );
2009-02-27 00:03:30 +00:00
}
/**
* @param Title $title
* @return string[] Usernames of the last 10 (unique) authors from $title
*/
public static function getLastPageAuthors( Title $title ) {
if ( !$title->exists() ) {
return [];
}
$cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
$fname = __METHOD__;
return $cache->getWithSetCallback(
$cache->makeKey( 'last-10-authors', 'revision', $title->getLatestRevID() ),
$cache::TTL_MINUTE,
function ( $oldValue, &$ttl, array &$setOpts ) use ( $title, $fname ) {
$dbr = wfGetDB( DB_REPLICA );
$setOpts += Database::getCacheSetOptions( $dbr );
// Get the last 100 edit authors with a trivial query (avoid T116557)
$revQuery = MediaWikiServices::getInstance()->getRevisionStore()->getQueryInfo();
$revAuthors = $dbr->selectFieldValues(
$revQuery['tables'],
$revQuery['fields']['rev_user_text'],
[ 'rev_page' => $title->getArticleID() ],
$fname,
// Some pages have < 10 authors but many revisions (e.g. bot pages)
[ 'ORDER BY' => 'rev_timestamp DESC, rev_id DESC',
'LIMIT' => 100,
// Force index per T116557
'USE INDEX' => [ 'revision' => 'page_timestamp' ],
],
$revQuery['joins']
);
// Get the last 10 distinct authors within this set of edits
$users = [];
foreach ( $revAuthors as $author ) {
$users[$author] = 1;
if ( count( $users ) >= 10 ) {
break;
}
}
return array_keys( $users );
}
);
}
}