http://example.com/
* and return an object that can produce rendered output.
*/
function __construct( $url, $args ) {
global $wgRSSDateDefaultFormat,$wgRSSItemMaxLength;
$this->url = $url;
# Get max number of headlines from argument-array
if ( isset( $args['max'] ) ) {
$this->maxheads = $args['max'];
}
# Get reverse flag from argument array
if ( isset( $args['reverse'] ) ) {
$this->reversed = true;
}
# Get date format from argument array
# or use a default value
if ( isset( $args['date'] ) ) {
$this->date = $args['date'];
} elseif ( isset( $wgRSSDateDefaultFormat ) ) {
$this->date = $wgRSSDateDefaultFormat;
}
# Get highlight terms from argument array
if ( isset( $args['highlight'] ) ) {
# mapping to lowercase here so the regex can be case insensitive below.
$this->highlight = self::explodeOnSpaces( $args['highlight'] );
}
# Get filter terms from argument array
if ( isset( $args['filter'] ) ) {
$this->filter = self::explodeOnSpaces( $args['filter'] );
}
# Get a maximal length for item texts
if ( isset( $args['item-max-length'] ) ) {
$this->ItemMaxLength = $args['item-max-length'];
} elseif ( is_numeric( $wgRSSItemMaxLength ) ) {
$this->ItemMaxLength = $wgRSSItemMaxLength;
}
if ( isset( $args['filterout'] ) ) {
$this->filterOut = self::explodeOnSpaces( $args['filterout'] );
}
// 'template' is the pagename of a user's itemTemplate including
// a further pagename for the feedTemplate
// In that way everything is handled via these two pages
// and no default pages or templates are used.
// 'templatename' is an optional pagename of a user's feedTemplate
// In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item
if ( isset( $args['template'] ) ) {
$itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE );
$itemTemplateArticleObject = new Article( $itemTemplateTitleObject, 0 );
$this->itemTemplate = $itemTemplateArticleObject->fetchContent();
} else {
if ( isset( $args['templatename'] ) ) {
$feedTemplatePagename = $args['templatename'];
} else {
// compatibility patch for rss extension
$feedTemplatePagename = 'RSSPost';
$feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE );
if ( !$feedTemplateTitleObject->exists() ) {
$feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' );
}
}
// MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }}
// if the attribute parameter templatename= is not present
// then it defaults to
// {{ Template:RSSPost | title = {{{title}}} | ... }} - if Template:RSSPost exists from pre-1.9 versions
// {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} - otherwise
$this->itemTemplate = wfMsgNoTrans( 'rss-item', $feedTemplatePagename );
}
}
/**
* Return RSS object for the given URL, maintaining caching.
*
* NOTES ON RETRIEVING REMOTE FILES:
* No attempt will be made to fetch remote files if there is something in cache.
*
* NOTES ON FAILED REQUESTS:
* If there is an HTTP error while fetching an RSS object, the cached version
* will be returned, if it exists.
*
* @return Status object
*/
function fetch() {
if ( !isset( $this->url ) ) {
return Status::newFatal( 'rss-fetch-nourl' );
}
// Flow
// 1. check cache
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
$key = wfMemcKey( 'rss', $this->url );
$cachedFeed = $this->loadFromCache( $key );
if ( $cachedFeed !== false ) {
wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url );
return Status::newGood();
}
wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' );
$status = $this->fetchRemote( $key );
return $status;
}
/**
* Retrieve the URL from the cache
* @param $key String: lookup key to associate with this item
* @return boolean
*/
protected function loadFromCache( $key ) {
global $wgMemc, $wgRSSCacheCompare;
$data = $wgMemc->get( $key );
if ( !is_array( $data ) ) {
return false;
}
list( $etag, $lastModified, $rss ) =
$data;
if ( !isset( $rss->items ) ) {
return false;
}
wfDebugLog( 'RSS', "Got '$key' from cache" );
# Now that we've verified that we got useful data, keep it around.
$this->rss = $rss;
$this->etag = $etag;
$this->lastModified = $lastModified;
// We only care if $wgRSSCacheCompare is > 0
if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) {
wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' );
return false;
}
return true;
}
/**
* Store these objects (i.e. etag, lastModified, and RSS) in the cache.
* @param $key String: lookup key to associate with this item
* @return boolean
*/
protected function storeInCache( $key ) {
global $wgMemc, $wgRSSCacheAge;
if ( !isset( $this->rss ) ) {
return false;
}
$r = $wgMemc->set( $key,
array( $this->etag, $this->lastModified, $this->rss ),
$wgRSSCacheAge );
wfDebugLog( 'RSS', "Stored '$key' as in cache? $r");
return true;
}
/**
* Retrieve a feed.
* @param $key String:
* @param $headers Array: headers to send along with the request
* @return Status object
*/
protected function fetchRemote( $key, array $headers = array()) {
global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy,
$wgRSSUrlNumberOfAllowedRedirects;
if ( $this->etag ) {
wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
$headers['If-None-Match'] = $this->etag;
}
if ( $this->lastModified ) {
$lm = gmdate( 'r', $this->lastModified );
wfDebugLog( 'RSS', "Used last modified: $lm" );
$headers['If-Modified-Since'] = $lm;
}
/**
* 'noProxy' can conditionally be set as shown in the commented
* example below; in HttpRequest 'noProxy' takes precedence over
* any value of 'proxy' and disables the use of a proxy.
*
* This is useful if you run the wiki in an intranet and need to
* access external feed urls through a proxy but internal feed
* urls must be accessed without a proxy.
*
* The general handling of such cases will be subject of a
* forthcoming version.
*/
$url = $this->url;
$noProxy = !isset( $wgRSSProxy );
// Example for disabling proxy use for certain urls
// $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) );
/**
* Copied from HttpFunctions.php
* Perform an HTTP request
*
* @param $method String: HTTP method. Usually GET/POST
* @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL
* @param $options Array: options to pass to MWHttpRequest object.
* Possible keys for the array:
* - timeout Timeout length in seconds
* - postData An array of key-value pairs or a url-encoded form data
* - proxy The proxy to use.
* Otherwise it will use $wgHTTPProxy (if set)
* Otherwise it will use the environment variable "http_proxy" (if set)
* - noProxy Don't use any proxy at all. Takes precedence over proxy value(s).
* - sslVerifyHost (curl only) Verify hostname against certificate
* - sslVerifyCert (curl only) Verify SSL certificate
* - caInfo (curl only) Provide CA information
* - maxRedirects Maximum number of redirects to follow (defaults to 5)
* - followRedirects Whether to follow redirects (defaults to false).
* Note: this should only be used when the target URL is trusted,
* to avoid attacks on intranet services accessible by HTTP.
* - userAgent A user agent, if you want to override the default
* MediaWiki/$wgVersion
* @return Mixed: (bool)false on failure or a string on success
*/
if ( isset( $wgRSSUrlNumberOfAllowedRedirects )
&& is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) {
$maxRedirects = $wgRSSUrlNumberOfAllowedRedirects;
} else {
$maxRedirects = 0;
}
// we set followRedirects intentionally to true to see error messages
// in cases where the maximum number of redirects is reached
$client = HttpRequest::factory( $url,
array(
'timeout' => $wgRSSFetchTimeout,
'followRedirects' => true,
'maxRedirects' => $maxRedirects,
'proxy' => $wgRSSProxy,
'noProxy' => $noProxy,
'userAgent' => $wgRSSUserAgent,
)
);
foreach ( $headers as $header => $value ) {
$client->setHeader( $header, $value );
}
$fetch = $client->execute();
$this->client = $client;
if ( !$fetch->isGood() ) {
wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() );
return $fetch;
}
$ret = $this->responseToXML( $key );
return $ret;
}
function sandboxParse($wikiText) {
global $wgTitle, $wgUser;
$myParser = new Parser();
$myParserOptions = ParserOptions::newFromUser($wgUser);
$result = $myParser->parse($wikiText, $wgTitle, $myParserOptions);
return $result->getText();
}
/**
* Render the entire feed so that each item is passed to the
* template which the MediaWiki then displays.
*
* @param $parser the parser param to pass to recursiveTagParse()
* @param $frame the frame param to pass to recursiveTagParse()
*/
function renderFeed( $parser, $frame ) {
$renderedFeed = '';
if ( isset( $this->itemTemplate ) && isset( $parser ) && isset( $frame ) ) {
$headcnt = 0;
if ( $this->reversed ) {
$this->rss->items = array_reverse( $this->rss->items );
}
foreach ( $this->rss->items as $item ) {
if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) {
continue;
}
if ( $this->canDisplay( $item ) ) {
$renderedFeed .= $this->renderItem( $item, $parser ) . "\n";
$headcnt++;
}
}
$renderedFeed = $this->sandboxParse( $renderedFeed );
}
$parser->addTrackingCategory( 'rss-tracking-category' );
return $renderedFeed;
}
/**
* Render each item, filtering it out if necessary, applying any highlighting.
*
* @param $item Array: an array produced by RSSData where keys are the names of the RSS elements
*/
protected function renderItem( $item, $parser ) {
$renderedItem = $this->itemTemplate;
// $info will only be an XML element name, so we're safe using it.
// $item[$info] is handled by the XML parser --
// and that means bad RSS with stuff like
// will find its
// rogue