Merge "Store references in page_props and cache"

This commit is contained in:
jenkins-bot 2016-02-12 19:56:34 +00:00 committed by Gerrit Code Review
commit 068c7a84e1
4 changed files with 266 additions and 2 deletions

View file

@ -65,4 +65,84 @@ class CiteHooks {
return true;
}
/**
* Callback for LinksUpdateConstructed hook
* If $wgCiteCacheRawReferencesOnParse is set to true, caches the raw references
* in array form
*
* @param LinksUpdate $linksUpdate
*/
public static function onLinksUpdateConstructed( LinksUpdate &$linksUpdate ) {
global $wgCiteStoreReferencesData, $wgCiteCacheRawReferencesOnParse;
if ( !$wgCiteStoreReferencesData || !$wgCiteCacheRawReferencesOnParse ) {
return;
}
$refs = $linksUpdate->getParserOutput()->getExtensionData( Cite::EXT_DATA_KEY );
if ( $refs !== null ) {
$cache = ObjectCache::getMainWANInstance();
$articleID = $linksUpdate->getTitle()->getArticleID();
$key = $cache->makeKey( Cite::EXT_DATA_KEY, $articleID );
$cache->set( $key, $refs, Cite::CACHE_DURATION_ONPARSE );
}
}
/**
* Callback for LinksUpdate hook
* Post-output processing of references property, for proper db storage
* Deferred to avoid performance overhead when outputting the page
*
* @param LinksUpdate $linksUpdate
*/
public static function onLinksUpdate( LinksUpdate &$linksUpdate ) {
global $wgCiteStoreReferencesData;
if ( !$wgCiteStoreReferencesData ) {
return;
}
$refs = $linksUpdate->getParserOutput()->getExtensionData( Cite::EXT_DATA_KEY );
if ( $refs !== null ) {
// JSON encode
$ppValue = FormatJson::encode( $refs, false, FormatJson::ALL_OK );
// GZIP encode references data at maximum compression
$ppValue = gzencode( $ppValue, 9 );
// split the string in smaller parts that can fit into a db blob
$ppValues = str_split( $ppValue, Cite::MAX_STORAGE_LENGTH );
foreach ( $ppValues as $num => $ppValue ) {
$key = 'references-' . intval( $num + 1 );
$linksUpdate->mProperties[$key] = $ppValue;
}
$linksUpdate->getParserOutput()->setExtensionData( Cite::EXT_DATA_KEY, null );
}
}
/**
* Callback for LinksUpdateComplete hook
* If $wgCiteCacheRawReferencesOnParse is set to false, purges the cache
* when references are modified
*
* @param LinksUpdate $linksUpdate
*/
public static function onLinksUpdateComplete( LinksUpdate &$linksUpdate ) {
global $wgCiteStoreReferencesData, $wgCiteCacheRawReferencesOnParse;
if ( !$wgCiteStoreReferencesData || $wgCiteCacheRawReferencesOnParse ) {
return;
}
// if we can, avoid clearing the cache when references were not changed
if ( method_exists( $linksUpdate, 'getAddedProperties' )
&& method_exists( $linksUpdate, 'getRemovedProperties' )
) {
$addedProps = $linksUpdate->getAddedProperties();
$removedProps = $linksUpdate->getRemovedProperties();
if ( !isset( $addedProps['references-1'] )
&& !isset( $removedProps['references-1'] )
) {
return;
}
}
$cache = ObjectCache::getMainWANInstance();
$articleID = $linksUpdate->getTitle()->getArticleID();
$key = $cache->makeKey( Cite::EXT_DATA_KEY, $articleID );
// delete with reduced hold off period (LinksUpdate uses a master connection)
$cache->delete( $key, WANObjectCache::MAX_COMMIT_DELAY );
}
}

View file

@ -30,6 +30,32 @@ class Cite {
*/
const DEFAULT_GROUP = '';
/**
* Maximum storage capacity for pp_value field of page_props table
* @todo Find a way to retrieve this information from the DBAL
*/
const MAX_STORAGE_LENGTH = 65535; // Size of MySQL 'blob' field
/**
* Key used for storage in parser output's ExtensionData and ObjectCache
*/
const EXT_DATA_KEY = 'Cite:References';
/**
* Version number in case we change the data structure in the future
*/
const DATA_VERSION_NUMBER = 1;
/**
* Cache duration set when parsing a page with references
*/
const CACHE_DURATION_ONPARSE = 3600; // 1 hour
/**
* Cache duration set when fetching references from db
*/
const CACHE_DURATION_ONFETCH = 18000; // 5 hours
/**#@+
* @access private
*/
@ -149,6 +175,11 @@ class Cite {
*/
public $mRefCallStack = array();
/**
* @var bool
*/
private $mBumpRefData = false;
/**
* Did we install us into $wgHooks yet?
* @var Boolean
@ -187,6 +218,10 @@ class Cite {
$frame->setVolatile();
}
// new <ref> tag, we may need to bump the ref data counter
// to avoid overwriting a previous group
$this->mBumpRefData = true;
return $ret;
}
@ -724,6 +759,11 @@ class Cite {
$ret = $this->mParser->unserializeHalfParsedText( $data );
}
if ( !$this->mParser->getOptions()->getIsPreview() ) {
// save references data for later use by LinksUpdate hooks
$this->saveReferencesData( $group );
}
// done, clean up so we can reuse the group
unset( $this->mRefs[$group] );
unset( $this->mGroupCnt[$group] );
@ -1105,7 +1145,15 @@ class Cite {
return true;
}
$isSectionPreview = $parser->getOptions()->getIsSectionPreview();
if ( !$parser->getOptions()->getIsPreview() ) {
// save references data for later use by LinksUpdate hooks
if ( $this->mRefs && isset( $this->mRefs[self::DEFAULT_GROUP] ) ) {
$this->saveReferencesData();
}
$isSectionPreview = false;
} else {
$isSectionPreview = $parser->getOptions()->getIsSectionPreview();
}
$s = '';
foreach ( $this->mRefs as $group => $refs ) {
@ -1131,6 +1179,40 @@ class Cite {
return true;
}
/**
* Saves references in parser extension data
* This is called by each <references/> tag, and by checkRefsNoReferences
* Assumes $this->mRefs[$group] is set
*
* @param $group
*/
private function saveReferencesData( $group = self::DEFAULT_GROUP ) {
global $wgCiteStoreReferencesData;
if ( !$wgCiteStoreReferencesData ) {
return;
}
$savedRefs = $this->mParser->getOutput()->getExtensionData( self::EXT_DATA_KEY );
if ( $savedRefs === null ) {
// Initialize array structure
$savedRefs = array(
'refs' => array(),
'version' => self::DATA_VERSION_NUMBER,
);
}
if ( $this->mBumpRefData ) {
// This handles pages with multiple <references/> tags with <ref> tags in between.
// On those, a group can appear several times, so we need to avoid overwriting
// a previous appearance.
$savedRefs['refs'][] = array();
$this->mBumpRefData = false;
}
$n = count( $savedRefs['refs'] ) - 1;
// save group
$savedRefs['refs'][$n][$group] = $this->mRefs[$group];
$this->mParser->getOutput()->setExtensionData( self::EXT_DATA_KEY, $savedRefs );
}
/**
* Hook for the InlineEditor extension.
* If any ref or reference reference tag is in the text,
@ -1219,5 +1301,82 @@ class Cite {
return $ret;
}
/**
* Fetch references stored for the given title in page_props
* For performance, results are cached
*
* @param Title $title
* @return array|false
*/
public static function getStoredReferences( Title $title ) {
global $wgCiteStoreReferencesData;
if ( !$wgCiteStoreReferencesData ) {
return false;
}
$cache = ObjectCache::getMainWANInstance();
$key = $cache->makeKey( self::EXT_DATA_KEY, $title->getArticleID() );
return $cache->getWithSetCallback(
$key,
self::CACHE_DURATION_ONFETCH,
function ( $oldValue, &$ttl, array &$setOpts ) use ( $title ) {
$dbr = wfGetDB( DB_SLAVE );
$setOpts += Database::getCacheSetOptions( $dbr );
return self::recursiveFetchRefsFromDB( $title, $dbr );
},
array(
'checkKeys' => array( $key ),
'lockTSE' => 30,
)
);
}
/**
* Reconstructs compressed json by successively retrieving the properties references-1, -2, etc
* It attempts the next step when a decoding error occurs.
* Returns json_decoded uncompressed string, with validation of json
*
* @param Title $title
* @param DatabaseBase $dbr
* @param string $string
* @param int $i
* @return array|false
*/
private static function recursiveFetchRefsFromDB( Title $title, DatabaseBase $dbr,
$string = '', $i = 1 ) {
$id = $title->getArticleID();
$result = $dbr->selectField(
'page_props',
'pp_value',
array(
'pp_page' => $id,
'pp_propname' => 'references-' . $i
),
__METHOD__
);
if ( $result !== false ) {
$string .= $result;
$decodedString = gzdecode( $string );
if ( $decodedString !== false ) {
$json = json_decode( $decodedString, true );
if ( json_last_error() === JSON_ERROR_NONE ) {
return $json;
}
// corrupted json ?
// shouldn't happen since when string is truncated, gzdecode should fail
wfDebug( "Corrupted json detected when retrieving stored references for title id $id" );
}
// if gzdecode fails, try to fetch next references- property value
return self::recursiveFetchRefsFromDB( $title, $dbr, $string, ++$i );
} else {
// no refs stored in page_props at this index
if ( $i > 1 ) {
// shouldn't happen
wfDebug( "Failed to retrieve stored references for title id $id" );
}
return false;
}
}
/**#@-*/
}

14
README.md Normal file
View file

@ -0,0 +1,14 @@
Cite
=============
The Cite extension provides a way for users to create references as footnotes to articles.
See https://www.mediawiki.org/wiki/Extension:Cite for detailed documentation.
Configuration
-------------
* `$wgCiteStoreReferencesData`: If set to true, references are saved in the database so that
other extensions can retrieve them independently of the main article content.
* `$wgCiteCacheReferencesDataOnParse`: (`$wgCiteStoreReferencesData` required) By default,
references are cached only on database access. If set to true, references are also cached
whenever pages are parsed.

View file

@ -27,6 +27,15 @@
],
"ResourceLoaderTestModules": [
"CiteHooks::onResourceLoaderTestModules"
],
"LinksUpdateConstructed": [
"CiteHooks::onLinksUpdateConstructed"
],
"LinksUpdate": [
"CiteHooks::onLinksUpdate"
],
"LinksUpdateComplete": [
"CiteHooks::onLinksUpdateComplete"
]
},
"ResourceModules": {
@ -160,7 +169,9 @@
],
"config": {
"AllowCiteGroups": true,
"CiteCacheReferences": false
"CiteCacheReferences": false,
"CiteStoreReferencesData": false,
"CiteCacheReferencesDataOnParse": false
},
"AutoloadClasses": {
"Cite": "Cite_body.php",