http://example.com/ * and return an object that can produce rendered output. */ function __construct( $url, $args ) { global $wgRSSDateDefaultFormat,$wgRSSItemMaxLength; $this->url = $url; # Get max number of headlines from argument-array if ( isset( $args['max'] ) ) { $this->maxheads = $args['max']; } # Get reverse flag from argument array if ( isset( $args['reverse'] ) ) { $this->reversed = true; } # Get date format from argument array # or use a default value if ( isset( $args['date'] ) ) { $this->date = $args['date']; } elseif ( isset( $wgRSSDateDefaultFormat ) ) { $this->date = $wgRSSDateDefaultFormat; } # Get highlight terms from argument array if ( isset( $args['highlight'] ) ) { # mapping to lowercase here so the regex can be case insensitive below. $this->highlight = self::explodeOnSpaces( $args['highlight'] ); } # Get filter terms from argument array if ( isset( $args['filter'] ) ) { $this->filter = self::explodeOnSpaces( $args['filter'] ); } # Get a maximal length for item texts if ( isset( $args['item-max-length'] ) ) { $this->ItemMaxLength = $args['item-max-length']; } elseif ( is_numeric( $wgRSSItemMaxLength ) ) { $this->ItemMaxLength = $wgRSSItemMaxLength; } if ( isset( $args['filterout'] ) ) { $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); } // 'template' is the pagename of a user's itemTemplate including // a further pagename for the feedTemplate // In that way everything is handled via these two pages // and no default pages or templates are used. // 'templatename' is an optional pagename of a user's feedTemplate // In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item if ( isset( $args['template'] ) ) { $itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); $itemTemplateArticleObject = new Article( $itemTemplateTitleObject, 0 ); $this->itemTemplate = $itemTemplateArticleObject->fetchContent(); } else { if ( isset( $args['templatename'] ) ) { $feedTemplatePagename = $args['templatename']; } else { // compatibility patch for rss extension $feedTemplatePagename = 'RSSPost'; $feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE ); if ( !$feedTemplateTitleObject->exists() ) { $feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' ); } } // MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }} // if the attribute parameter templatename= is not present // then it defaults to // {{ Template:RSSPost | title = {{{title}}} | ... }} - if Template:RSSPost exists from pre-1.9 versions // {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} - otherwise $this->itemTemplate = wfMsgNoTrans( 'rss-item', $feedTemplatePagename ); } } /** * Return RSS object for the given URL, maintaining caching. * * NOTES ON RETRIEVING REMOTE FILES: * No attempt will be made to fetch remote files if there is something in cache. * * NOTES ON FAILED REQUESTS: * If there is an HTTP error while fetching an RSS object, the cached version * will be returned, if it exists. * * @return Status object */ function fetch() { if ( !isset( $this->url ) ) { return Status::newFatal( 'rss-fetch-nourl' ); } // Flow // 1. check cache // 2. if there is a hit, make sure its fresh // 3. if cached obj fails freshness check, fetch remote // 4. if remote fails, return stale object, or error $key = wfMemcKey( 'rss', $this->url ); $cachedFeed = $this->loadFromCache( $key ); if ( $cachedFeed !== false ) { wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); return Status::newGood(); } wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' ); $status = $this->fetchRemote( $key ); return $status; } /** * Retrieve the URL from the cache * @param $key String: lookup key to associate with this item * @return boolean */ protected function loadFromCache( $key ) { global $wgMemc, $wgRSSCacheCompare; $data = $wgMemc->get( $key ); if ( !is_array( $data ) ) { return false; } list( $etag, $lastModified, $rss ) = $data; if ( !isset( $rss->items ) ) { return false; } wfDebugLog( 'RSS', "Got '$key' from cache" ); # Now that we've verified that we got useful data, keep it around. $this->rss = $rss; $this->etag = $etag; $this->lastModified = $lastModified; // We only care if $wgRSSCacheCompare is > 0 if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) { wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' ); return false; } return true; } /** * Store these objects (i.e. etag, lastModified, and RSS) in the cache. * @param $key String: lookup key to associate with this item * @return boolean */ protected function storeInCache( $key ) { global $wgMemc, $wgRSSCacheAge; if ( !isset( $this->rss ) ) { return false; } $r = $wgMemc->set( $key, array( $this->etag, $this->lastModified, $this->rss ), $wgRSSCacheAge ); wfDebugLog( 'RSS', "Stored '$key' as in cache? $r"); return true; } /** * Retrieve a feed. * @param $key String: * @param $headers Array: headers to send along with the request * @return Status object */ protected function fetchRemote( $key, array $headers = array()) { global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy, $wgRSSUrlNumberOfAllowedRedirects; if ( $this->etag ) { wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); $headers['If-None-Match'] = $this->etag; } if ( $this->lastModified ) { $lm = gmdate( 'r', $this->lastModified ); wfDebugLog( 'RSS', "Used last modified: $lm" ); $headers['If-Modified-Since'] = $lm; } /** * 'noProxy' can conditionally be set as shown in the commented * example below; in HttpRequest 'noProxy' takes precedence over * any value of 'proxy' and disables the use of a proxy. * * This is useful if you run the wiki in an intranet and need to * access external feed urls through a proxy but internal feed * urls must be accessed without a proxy. * * The general handling of such cases will be subject of a * forthcoming version. */ $url = $this->url; $noProxy = !isset( $wgRSSProxy ); // Example for disabling proxy use for certain urls // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) ); /** * Copied from HttpFunctions.php * Perform an HTTP request * * @param $method String: HTTP method. Usually GET/POST * @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL * @param $options Array: options to pass to MWHttpRequest object. * Possible keys for the array: * - timeout Timeout length in seconds * - postData An array of key-value pairs or a url-encoded form data * - proxy The proxy to use. * Otherwise it will use $wgHTTPProxy (if set) * Otherwise it will use the environment variable "http_proxy" (if set) * - noProxy Don't use any proxy at all. Takes precedence over proxy value(s). * - sslVerifyHost (curl only) Verify hostname against certificate * - sslVerifyCert (curl only) Verify SSL certificate * - caInfo (curl only) Provide CA information * - maxRedirects Maximum number of redirects to follow (defaults to 5) * - followRedirects Whether to follow redirects (defaults to false). * Note: this should only be used when the target URL is trusted, * to avoid attacks on intranet services accessible by HTTP. * - userAgent A user agent, if you want to override the default * MediaWiki/$wgVersion * @return Mixed: (bool)false on failure or a string on success */ if ( isset( $wgRSSUrlNumberOfAllowedRedirects ) && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) { $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects; } else { $maxRedirects = 0; } // we set followRedirects intentionally to true to see error messages // in cases where the maximum number of redirects is reached $client = HttpRequest::factory( $url, array( 'timeout' => $wgRSSFetchTimeout, 'followRedirects' => true, 'maxRedirects' => $maxRedirects, 'proxy' => $wgRSSProxy, 'noProxy' => $noProxy, 'userAgent' => $wgRSSUserAgent, ) ); foreach ( $headers as $header => $value ) { $client->setHeader( $header, $value ); } $fetch = $client->execute(); $this->client = $client; if ( !$fetch->isGood() ) { wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() ); return $fetch; } $ret = $this->responseToXML( $key ); return $ret; } function sandboxParse($wikiText) { global $wgTitle, $wgUser; $myParser = new Parser(); $myParserOptions = ParserOptions::newFromUser($wgUser); $result = $myParser->parse($wikiText, $wgTitle, $myParserOptions); return $result->getText(); } /** * Render the entire feed so that each item is passed to the * template which the MediaWiki then displays. * * @param $parser the parser param to pass to recursiveTagParse() * @param $frame the frame param to pass to recursiveTagParse() */ function renderFeed( $parser, $frame ) { $renderedFeed = ''; if ( isset( $this->itemTemplate ) && isset( $parser ) && isset( $frame ) ) { $headcnt = 0; if ( $this->reversed ) { $this->rss->items = array_reverse( $this->rss->items ); } foreach ( $this->rss->items as $item ) { if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { continue; } if ( $this->canDisplay( $item ) ) { $renderedFeed .= $this->renderItem( $item, $parser ) . "\n"; $headcnt++; } } $renderedFeed = $this->sandboxParse( $renderedFeed ); } $parser->addTrackingCategory( 'rss-tracking-category' ); return $renderedFeed; } /** * Render each item, filtering it out if necessary, applying any highlighting. * * @param $item Array: an array produced by RSSData where keys are the names of the RSS elements */ protected function renderItem( $item, $parser ) { $renderedItem = $this->itemTemplate; // $info will only be an XML element name, so we're safe using it. // $item[$info] is handled by the XML parser -- // and that means bad RSS with stuff like // will find its // rogue