mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/AbuseFilter.git
synced 2024-11-27 23:40:19 +00:00
Merge "Refactor tokenizer caching"
This commit is contained in:
commit
058e215882
|
@ -70,7 +70,7 @@ class AFPTreeParser {
|
|||
* @return AFPTreeNode|null
|
||||
*/
|
||||
public function parse( $code ) {
|
||||
$this->mTokens = AbuseFilterTokenizer::tokenize( $code );
|
||||
$this->mTokens = AbuseFilterTokenizer::getTokens( $code );
|
||||
$this->mPos = 0;
|
||||
|
||||
return $this->doLevelEntry();
|
||||
|
|
|
@ -204,7 +204,7 @@ class AbuseFilterParser {
|
|||
*/
|
||||
public function intEval( $code ) {
|
||||
// Reset all class members to their default value
|
||||
$this->mTokens = AbuseFilterTokenizer::tokenize( $code );
|
||||
$this->mTokens = AbuseFilterTokenizer::getTokens( $code );
|
||||
$this->mPos = 0;
|
||||
$this->mShortCircuit = false;
|
||||
|
||||
|
|
|
@ -66,46 +66,44 @@ class AbuseFilterTokenizer {
|
|||
'rlike', 'irlike', 'regex', 'if', 'then', 'else', 'end',
|
||||
];
|
||||
|
||||
/** @var BagOStuff */
|
||||
public static $tokenizerCache;
|
||||
|
||||
/**
|
||||
* Get a cache key used to store the tokenized code
|
||||
*
|
||||
* @param WANObjectCache $cache
|
||||
* @param string $code Not yet tokenized
|
||||
* @return string
|
||||
* @internal
|
||||
*/
|
||||
public static function getCacheKey( $code ) {
|
||||
return wfGlobalCacheKey( __CLASS__, self::CACHE_VERSION, crc32( $code ) );
|
||||
public static function getCacheKey( WANObjectCache $cache, $code ) {
|
||||
return $cache->makeGlobalKey( __CLASS__, crc32( $code ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the tokens for the given code.
|
||||
*
|
||||
* @param string $code
|
||||
* @return array[]
|
||||
*/
|
||||
public static function getTokens( $code ) {
|
||||
$cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
|
||||
|
||||
$tokens = $cache->getWithSetCallback(
|
||||
self::getCacheKey( $cache, $code ),
|
||||
$cache::TTL_DAY,
|
||||
function ( $oldValue, &$ttl, array &$setOpts ) use ( $code ) {
|
||||
return self::tokenize( $code );
|
||||
},
|
||||
[ 'version' => self::CACHE_VERSION ]
|
||||
);
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $code
|
||||
* @return array[]
|
||||
* @throws AFPException
|
||||
* @throws AFPUserVisibleException
|
||||
*/
|
||||
public static function tokenize( $code ) {
|
||||
if ( !self::$tokenizerCache ) {
|
||||
self::$tokenizerCache = ObjectCache::getLocalServerInstance( 'hash' );
|
||||
}
|
||||
|
||||
static $stats = null;
|
||||
|
||||
if ( !$stats ) {
|
||||
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
|
||||
}
|
||||
|
||||
$cacheKey = self::getCacheKey( $code );
|
||||
|
||||
$tokens = self::$tokenizerCache->get( $cacheKey );
|
||||
|
||||
if ( $tokens ) {
|
||||
$stats->increment( 'abusefilter.tokenizerCache.hit' );
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
$stats->increment( 'abusefilter.tokenizerCache.miss' );
|
||||
private static function tokenize( $code ) {
|
||||
$tokens = [];
|
||||
$curPos = 0;
|
||||
|
||||
|
@ -115,8 +113,6 @@ class AbuseFilterTokenizer {
|
|||
$tokens[ $token->pos ] = [ $token, $curPos ];
|
||||
} while ( $curPos !== $prevPos );
|
||||
|
||||
self::$tokenizerCache->set( $cacheKey, $tokens, 60 * 60 * 24 );
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
|
|
|
@ -149,16 +149,23 @@ class AbuseFilterTokenizerTest extends MediaWikiTestCase {
|
|||
* @dataProvider provideCode
|
||||
*/
|
||||
public function testCaching( $code ) {
|
||||
$cache = new HashBagOStuff();
|
||||
$this->setService( 'LocalServerObjectCache', $cache );
|
||||
$key = AbuseFilterTokenizer::getCacheKey( $code );
|
||||
$cache = new WANObjectCache( [ 'cache' => new HashBagOStuff() ] );
|
||||
$this->setService( 'MainWANObjectCache', $cache );
|
||||
|
||||
$key = AbuseFilterTokenizer::getCacheKey( $cache, $code );
|
||||
|
||||
// Other tests may have already cached the same code.
|
||||
$cache->delete( $key );
|
||||
// Static hell makes code difficult to test...
|
||||
AbuseFilterTokenizer::$tokenizerCache = null;
|
||||
AbuseFilterTokenizer::tokenize( $code );
|
||||
$this->assertNotFalse( $cache->get( $key ) );
|
||||
AbuseFilterTokenizer::getTokens( $code );
|
||||
$cached = $cache->getWithSetCallback(
|
||||
$key,
|
||||
$cache::TTL_DAY,
|
||||
function () {
|
||||
return false;
|
||||
},
|
||||
[ 'version' => 1 ]
|
||||
);
|
||||
$this->assertNotFalse( $cached );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue