From 42bd0d84f4244ca2304c6d161f71d90a6a53030c Mon Sep 17 00:00:00 2001 From: Marius Hoch Date: Mon, 7 Jan 2013 01:02:41 +0100 Subject: [PATCH] AbuseFilter: Change format of database logging/ performance AF is setting several lazy load variables for the currently editing user. To do this it's passing along the user name extracted from a user object and generating a new user object later from that name which is of course pointless. With this patch I'll pass user objects directly to prevent that. On top of that I've deprecated a method in AFComputedVariable::compute which was redundant as there is a more generic one which can solve that task just fine. Furthermore I've changed the logging behaviour from serializing the whole AbuseFilterVariableHolder object to only store the variables. That has two major advantages: * The amount of data that needs to be saved on a filter hit is reduced to about 1/10 of what the old version needed. * This is much more forward compatible as the old way of saving this relied on the class structure to stay the same while this is a simple array containing the vars. On top of that we now only log variables already set by the time a filter is hit. On top of the obvious performance increasement that makes it easier for the user to spot the relevant data. Another thing this change alters is the way the AbuseFilter internally works with AbuseFilterVariableHolder objects. Right now we use one for testing the filter(s) and later we use another one to compute the same data again in case a filter was hit (for logging)! This is not thoroughly tested yet, but way more sane than what we're currently doing! Change-Id: Ib15e7501bff32a54afe2d103ef5aedb950e58ef6 --- AbuseFilter.class.php | 72 ++++++++++++++++++---------- AbuseFilter.hooks.php | 2 +- AbuseFilter.parser.php | 11 ++++- AbuseFilterVariableHolder.php | 88 +++++++++++++++++++++++++++++++---- 4 files changed, 136 insertions(+), 37 deletions(-) diff --git a/AbuseFilter.class.php b/AbuseFilter.class.php index 6c9958476..7f4e7b5be 100644 --- a/AbuseFilter.class.php +++ b/AbuseFilter.class.php @@ -186,17 +186,37 @@ class AbuseFilter { public static function generateUserVars( $user ) { $vars = new AbuseFilterVariableHolder; - $vars->setLazyLoadVar( 'user_editcount', 'simple-user-accessor', - array( 'user' => $user->getName(), 'method' => 'getEditCount' ) ); - $vars->setVar( 'user_name', $user->getName() ); - $vars->setLazyLoadVar( 'user_emailconfirm', 'simple-user-accessor', - array( 'user' => $user->getName(), 'method' => 'getEmailAuthenticationTimestamp' ) ); + $vars->setLazyLoadVar( + 'user_editcount', + 'simple-user-accessor', + array( 'user' => $user, 'method' => 'getEditCount' ) + ); - $vars->setLazyLoadVar( 'user_age', 'user-age', - array( 'user' => $user->getName(), 'asof' => wfTimestampNow() ) ); - $vars->setLazyLoadVar( 'user_groups', 'user-groups', array( 'user' => $user->getName() ) ); - $vars->setLazyLoadVar( 'user_blocked', 'simple-user-accessor', - array( 'user' => $user->getName(), 'method' => 'isBlocked' ) ); + $vars->setVar( 'user_name', $user->getName() ); + + $vars->setLazyLoadVar( + 'user_emailconfirm', + 'simple-user-accessor', + array( 'user' => $user, 'method' => 'getEmailAuthenticationTimestamp' ) + ); + + $vars->setLazyLoadVar( + 'user_age', + 'user-age', + array( 'user' => $user, 'asof' => wfTimestampNow() ) + ); + + $vars->setLazyLoadVar( + 'user_groups', + 'simple-user-accessor', + array( 'user' => $user, 'method' => 'getEffectiveGroups' ) + ); + + $vars->setLazyLoadVar( + 'user_blocked', + 'simple-user-accessor', + array( 'user' => $user, 'method' => 'isBlocked' ) + ); wfRunHooks( 'AbuseFilter-generateUserVars', array( $vars, $user ) ); @@ -341,9 +361,7 @@ class AbuseFilter { /** * @var $parser AbuseFilterParser */ - $parser = new $wgAbuseFilterParserClass; - - $parser->setVars( $vars ); + $parser = new $wgAbuseFilterParserClass( $vars ); return $parser->evaluateExpression( $expr ); } @@ -369,9 +387,7 @@ class AbuseFilter { /** * @var $parser AbuseFilterParser */ - $parser = new $wgAbuseFilterParserClass; - - $parser->setVars( $vars ); + $parser = new $wgAbuseFilterParserClass( $vars ); } try { @@ -985,7 +1001,7 @@ class AbuseFilter { // Global stuff if ( count( $logged_global_filters ) ) { $vars->computeDBVars(); - $global_var_dump = self::storeVarDump( $vars, 'global' ); + $global_var_dump = self::storeVarDump( $vars, true ); $global_var_dump = "stored-text:$global_var_dump"; foreach ( $central_log_rows as $index => $data ) { $central_log_rows[$index]['afl_var_dump'] = $global_var_dump; @@ -1022,7 +1038,7 @@ class AbuseFilter { * Store a var dump to External Storage or the text table * Some of this code is stolen from Revision::insertOn and friends * - * @param $vars array + * @param $vars AbuseFilterVariableHolder * @param $global bool * * @return int @@ -1032,13 +1048,13 @@ class AbuseFilter { global $wgCompressRevisions; - if ( is_array( $vars ) || is_object( $vars ) ) { - $text = serialize( $vars ); - } else { - $text = $vars; - } + // Get all variables yet set and compute old and new wikitext if not yet done + // as those are needed for the diff view on top of the abuse log pages + $vars = $vars->dumpAllVars( array( 'old_wikitext', 'new_wikitext' ) ); - $flags = array(); + // Vars is an array with native PHP data types (non-objects) now + $text = serialize( $vars ); + $flags = array( 'nativeDataArray' ); if ( $wgCompressRevisions ) { if ( function_exists( 'gzdeflate' ) ) { @@ -1130,6 +1146,14 @@ class AbuseFilter { $obj = unserialize( $text ); + if ( in_array( 'nativeDataArray', $flags ) ) { + $vars = $obj; + $obj = new AbuseFilterVariableHolder(); + foreach( $vars as $key => $value ) { + $obj->setVar( $key, $value ); + } + } + wfProfileOut( __METHOD__ ); return $obj; } diff --git a/AbuseFilter.hooks.php b/AbuseFilter.hooks.php index a6edf6b5d..fed4512e0 100644 --- a/AbuseFilter.hooks.php +++ b/AbuseFilter.hooks.php @@ -115,7 +115,7 @@ class AbuseFilterHooks { } $vars->addHolder( AbuseFilter::generateUserVars( $user ) ); - $vars->addHolder( AbuseFilter::generateTitleVars( $title , 'article' ) ); + $vars->addHolder( AbuseFilter::generateTitleVars( $title , 'ARTICLE' ) ); $vars->setVar( 'action', 'edit' ); $vars->setVar( 'summary', $summary ); diff --git a/AbuseFilter.parser.php b/AbuseFilter.parser.php index 45813677d..f38f16850 100644 --- a/AbuseFilter.parser.php +++ b/AbuseFilter.parser.php @@ -632,8 +632,16 @@ class AbuseFilterParser { static $lastHandledToken = array(); - public function __construct() { + /** + * Create a new instance + * + * @param $vars AbuseFilterVariableHolder + */ + public function __construct( $vars = null ) { $this->resetState(); + if ( $vars instanceof AbuseFilterVariableHolder ) { + $this->mVars = $vars; + } } public function resetState() { @@ -668,7 +676,6 @@ class AbuseFilterParser { * @param $value */ public function setVar( $name, $value ) { - $name = strtolower( $name ); $this->mVars->setVar( $name, $value ); } diff --git a/AbuseFilterVariableHolder.php b/AbuseFilterVariableHolder.php index acca1a1f2..812712a07 100644 --- a/AbuseFilterVariableHolder.php +++ b/AbuseFilterVariableHolder.php @@ -27,7 +27,9 @@ class AbuseFilterVariableHolder { } /** - * @param $variable + * Get a variable from the current object + * + * @param $variable string * @return AFPData */ function getVar( $variable ) { @@ -74,6 +76,8 @@ class AbuseFilterVariableHolder { } /** + * Export all variables stored in this object as string + * * @return array */ function exportAllVars() { @@ -89,6 +93,55 @@ class AbuseFilterVariableHolder { return $exported; } + /** + * Dump all variables stored in this object in their native types. + * If you want a not yet set variable to be included in the results you can either set $compute to an array + * with the name of the variable or set $compute to true to compute all not yet set variables. + * + * @param $compute array|bool Variables we should copute if not yet set + * @param $includeUserVars bool Include user set variables + * @return array + */ + public function dumpAllVars( $compute = array(), $includeUserVars = false ) { + $allVarNames = array_keys( $this->mVars ); + $exported = array(); + + if ( !$includeUserVars ) { + // Compile a list of all variables set by the extension to be able to filter user set ones by name + global $wgRestrictionTypes; + + $coreVariables = AbuseFilter::getBuilderValues(); + $coreVariables = array_keys( $coreVariables['vars'] ); + + // Title vars can have several prefixes + $prefixes = array( 'ARTICLE', 'MOVED_FROM', 'MOVED_TO', 'FILE' ); + $titleVars = array( '_ARTICLEID', '_NAMESPACE', '_TEXT', '_PREFIXEDTEXT', '_recent_contributors' ); + foreach ( $wgRestrictionTypes as $action ) { + $titleVars[] = "_restrictions_$action"; + } + + foreach ( $titleVars as $var ) { + foreach ( $prefixes as $prefix ) { + $coreVariables[] = $prefix . $var; + } + } + $coreVariables = array_map( 'strtolower', $coreVariables ); + } + + foreach ( $allVarNames as $varName ) { + if ( + ( $includeUserVars || in_array( strtolower( $varName ), $coreVariables ) ) && + // Only include variables set in the extension in case $includeUserVars is false + !in_array( $varName, self::$varBlacklist ) && + ( $compute === true || ( is_array( $compute ) && in_array( $varName, $compute ) ) || $this->mVars[$varName] instanceof AFPData ) + ) { + $exported[$varName] = $this->getVar( $varName )->toNative(); + } + } + + return $exported; + } + /** * @param $var * @return bool @@ -166,20 +219,34 @@ class AFComputedVariable { } /** - * @param $username string + * For backwards compatibility: Get the user object belonging to a certain name + * in case a user name is given as argument. Nowadays user objects are passed + * directly but many old log entries rely on this. + * + * @param $user string|User * @return User */ - static function userObjectFromName( $username ) { - if ( isset( self::$userCache[$username] ) ) { - return self::$userCache[$username]; - } + static function getUserObject( $user ) { + if ( $user instanceof User ) { + $username = $user->getName(); + } else { + $username = $user; + if ( isset( self::$userCache[$username] ) ) { + return self::$userCache[$username]; + } - wfDebug( "Couldn't find user $username in cache\n" ); + wfDebug( "Couldn't find user $username in cache\n" ); + } if ( count( self::$userCache ) > 1000 ) { self::$userCache = array(); } + if ( $user instanceof User ) { + $userCache[$username] = $user; + return $user; + } + if ( IP::isIPAddress( $username ) ) { $u = new User; $u->setName( $username ); @@ -429,7 +496,7 @@ class AFComputedVariable { throw new MWException( 'No user parameter given.' ); } - $obj = self::userObjectFromName( $user ); + $obj = self::getUserObject( $user ); if ( !$obj ) { throw new MWException( "Invalid username $user" ); @@ -440,7 +507,7 @@ class AFComputedVariable { case 'user-age': $user = $parameters['user']; $asOf = $parameters['asof']; - $obj = self::userObjectFromName( $user ); + $obj = self::getUserObject( $user ); if ( $obj->getId() == 0 ) { $result = 0; @@ -451,8 +518,9 @@ class AFComputedVariable { $result = wfTimestamp( TS_UNIX, $asOf ) - wfTimestampOrNull( TS_UNIX, $registration ); break; case 'user-groups': + // Deprecated but needed by old log entries $user = $parameters['user']; - $obj = self::userObjectFromName( $user ); + $obj = self::getUserObject( $user ); $result = $obj->getEffectiveGroups(); break; case 'length':