Merge "Refactor tokenizer caching"

This commit is contained in:
jenkins-bot 2019-05-24 19:09:03 +00:00 committed by Gerrit Code Review
commit 058e215882
4 changed files with 42 additions and 39 deletions

View file

@ -70,7 +70,7 @@ class AFPTreeParser {
* @return AFPTreeNode|null
*/
public function parse( $code ) {
$this->mTokens = AbuseFilterTokenizer::tokenize( $code );
$this->mTokens = AbuseFilterTokenizer::getTokens( $code );
$this->mPos = 0;
return $this->doLevelEntry();

View file

@ -204,7 +204,7 @@ class AbuseFilterParser {
*/
public function intEval( $code ) {
// Reset all class members to their default value
$this->mTokens = AbuseFilterTokenizer::tokenize( $code );
$this->mTokens = AbuseFilterTokenizer::getTokens( $code );
$this->mPos = 0;
$this->mShortCircuit = false;

View file

@ -66,46 +66,44 @@ class AbuseFilterTokenizer {
'rlike', 'irlike', 'regex', 'if', 'then', 'else', 'end',
];
/** @var BagOStuff */
public static $tokenizerCache;
/**
* Get a cache key used to store the tokenized code
*
* @param WANObjectCache $cache
* @param string $code Not yet tokenized
* @return string
* @internal
*/
public static function getCacheKey( $code ) {
return wfGlobalCacheKey( __CLASS__, self::CACHE_VERSION, crc32( $code ) );
public static function getCacheKey( WANObjectCache $cache, $code ) {
return $cache->makeGlobalKey( __CLASS__, crc32( $code ) );
}
/**
* Get the tokens for the given code.
*
* @param string $code
* @return array[]
*/
public static function getTokens( $code ) {
$cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
$tokens = $cache->getWithSetCallback(
self::getCacheKey( $cache, $code ),
$cache::TTL_DAY,
function ( $oldValue, &$ttl, array &$setOpts ) use ( $code ) {
return self::tokenize( $code );
},
[ 'version' => self::CACHE_VERSION ]
);
return $tokens;
}
/**
* @param string $code
* @return array[]
* @throws AFPException
* @throws AFPUserVisibleException
*/
public static function tokenize( $code ) {
if ( !self::$tokenizerCache ) {
self::$tokenizerCache = ObjectCache::getLocalServerInstance( 'hash' );
}
static $stats = null;
if ( !$stats ) {
$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
}
$cacheKey = self::getCacheKey( $code );
$tokens = self::$tokenizerCache->get( $cacheKey );
if ( $tokens ) {
$stats->increment( 'abusefilter.tokenizerCache.hit' );
return $tokens;
}
$stats->increment( 'abusefilter.tokenizerCache.miss' );
private static function tokenize( $code ) {
$tokens = [];
$curPos = 0;
@ -115,8 +113,6 @@ class AbuseFilterTokenizer {
$tokens[ $token->pos ] = [ $token, $curPos ];
} while ( $curPos !== $prevPos );
self::$tokenizerCache->set( $cacheKey, $tokens, 60 * 60 * 24 );
return $tokens;
}

View file

@ -149,16 +149,23 @@ class AbuseFilterTokenizerTest extends MediaWikiTestCase {
* @dataProvider provideCode
*/
public function testCaching( $code ) {
$cache = new HashBagOStuff();
$this->setService( 'LocalServerObjectCache', $cache );
$key = AbuseFilterTokenizer::getCacheKey( $code );
$cache = new WANObjectCache( [ 'cache' => new HashBagOStuff() ] );
$this->setService( 'MainWANObjectCache', $cache );
$key = AbuseFilterTokenizer::getCacheKey( $cache, $code );
// Other tests may have already cached the same code.
$cache->delete( $key );
// Static hell makes code difficult to test...
AbuseFilterTokenizer::$tokenizerCache = null;
AbuseFilterTokenizer::tokenize( $code );
$this->assertNotFalse( $cache->get( $key ) );
AbuseFilterTokenizer::getTokens( $code );
$cached = $cache->getWithSetCallback(
$key,
$cache::TTL_DAY,
function () {
return false;
},
[ 'version' => 1 ]
);
$this->assertNotFalse( $cached );
}
/**