http://example.com/ * and return an object that can produce rendered output. * @param string $url * @param array $args */ public function __construct( $url, $args ) { global $wgRSSDateDefaultFormat, $wgRSSItemMaxLength; $this->url = $url; // Quote marks to prevent XSS (T307028) $this->markerString = "'\"" . wfRandomString( 32 ); $this->stripItems = []; $this->cache = MediaWikiServices::getInstance()->getMainWANObjectCache(); $this->parserFactory = MediaWikiServices::getInstance()->getParserFactory(); # Get max number of headlines from argument-array if ( isset( $args['max'] ) ) { $this->maxheads = $args['max']; } # Get reverse flag from argument array if ( isset( $args['reverse'] ) ) { $this->reversed = true; } # Get date format from argument array # or use a default value # @todo FIXME: not used yet if ( isset( $args['date'] ) ) { $this->date = $args['date']; } elseif ( isset( $wgRSSDateDefaultFormat ) ) { $this->date = $wgRSSDateDefaultFormat; } # Get highlight terms from argument array if ( isset( $args['highlight'] ) ) { # mapping to lowercase here so the regex can be case insensitive below. $this->highlight = self::explodeOnSpaces( $args['highlight'] ); } # Get filter terms from argument array if ( isset( $args['filter'] ) ) { $this->filter = self::explodeOnSpaces( $args['filter'] ); } # Get a maximal length for item texts if ( isset( $args['item-max-length'] ) ) { $this->ItemMaxLength = $args['item-max-length']; } elseif ( is_numeric( $wgRSSItemMaxLength ) ) { $this->ItemMaxLength = $wgRSSItemMaxLength; } if ( isset( $args['filterout'] ) ) { $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); } // 'template' is the pagename of a user's itemTemplate including // a further pagename for the feedTemplate // In that way everything is handled via these two pages // and no default pages or templates are used. // 'templatename' is an optional pagename of a user's feedTemplate // In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item if ( isset( $args['template'] ) ) { $itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE ); if ( $itemTemplateTitleObject->exists() ) { $itemTemplatePageObject = MediaWikiServices::getInstance() ->getWikiPageFactory() ->newFromTitle( $itemTemplateTitleObject ); $itemTemplateContentObject = $itemTemplatePageObject->getContent(); if ( $itemTemplateContentObject instanceof TextContent ) { $this->itemTemplate = $itemTemplateContentObject->getText(); } } } else { if ( isset( $args['templatename'] ) ) { $feedTemplatePagename = $args['templatename']; } else { // compatibility patch for rss extension $feedTemplatePagename = 'RSSPost'; $feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE ); if ( !$feedTemplateTitleObject->exists() ) { $feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' ); } } // MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }} // if the attribute parameter templatename= is not present // then it defaults to // {{ Template:RSSPost | title = {{{title}}} | ... }} // - if Template:RSSPost exists from pre-1.9 versions // {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} // - otherwise $this->itemTemplate = wfMessage( 'rss-item', $feedTemplatePagename )->plain(); } } private function insertStripItem( $item ) { $this->stripItems[] = $item; $itemIndex = count( $this->stripItems ) - 1; return "{$this->markerString}-{$itemIndex}-{$this->markerString}"; } /** * Return RSS object for the given URL, maintaining caching. * * NOTES ON RETRIEVING REMOTE FILES: * No attempt will be made to fetch remote files if there is something in cache. * * NOTES ON FAILED REQUESTS: * If there is an HTTP error while fetching an RSS object, the cached version * will be returned, if it exists. * * @return Status object */ public function fetch() { if ( !isset( $this->url ) ) { return Status::newFatal( 'rss-fetch-nourl' ); } // Flow // 1. check cache // 2. if there is a hit, make sure its fresh // 3. if cached obj fails freshness check, fetch remote // 4. if remote fails, return stale object, or error $key = $this->cache->makeKey( 'rss-fetch', $this->url ); $cachedFeed = $this->loadFromCache( $key ); if ( $cachedFeed !== false ) { wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url ); return Status::newGood(); } wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' ); return $this->fetchRemote( $key ); } /** * Retrieve the URL from the cache * @param string $key lookup key to associate with this item * @return bool */ protected function loadFromCache( $key ) { global $wgRSSCacheCompare; $data = $this->cache->get( $key ); if ( !is_array( $data ) ) { return false; } [ $etag, $lastModified, $rss ] = $data; if ( !isset( $rss->items ) ) { return false; } wfDebugLog( 'RSS', "Got '$key' from cache" ); # Now that we've verified that we got useful data, keep it around. $this->rss = $rss; $this->etag = $etag; $this->lastModified = $lastModified; // We only care if $wgRSSCacheCompare is > 0 if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) { wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' ); return false; } return true; } /** * Store these objects (i.e. etag, lastModified, and RSS) in the cache. * @param string $key lookup key to associate with this item * @return bool */ protected function storeInCache( $key ) { global $wgRSSCacheAge; if ( !isset( $this->rss ) ) { return false; } $this->cache->set( $key, [ $this->etag, $this->lastModified, $this->rss ], $wgRSSCacheAge ); wfDebugLog( 'RSS', "Stored '$key' as in cache" ); return true; } /** * Retrieve a feed. * @param string $key Cache key * @param array $headers headers to send along with the request * @return Status object */ protected function fetchRemote( $key, array $headers = [] ) { global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy, $wgRSSUrlNumberOfAllowedRedirects; if ( $this->etag ) { wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); $headers['If-None-Match'] = $this->etag; } if ( $this->lastModified ) { $lastModified = gmdate( 'r', $this->lastModified ); wfDebugLog( 'RSS', "Used last modified: $lastModified" ); $headers['If-Modified-Since'] = $lastModified; } /** * 'noProxy' can conditionally be set as shown in the commented * example below; in HttpRequest 'noProxy' takes precedence over * any value of 'proxy' and disables the use of a proxy. * * This is useful if you run the wiki in an intranet and need to * access external feed urls through a proxy but internal feed * urls must be accessed without a proxy. * * The general handling of such cases will be subject of a * forthcoming version. */ $url = $this->url; $noProxy = !isset( $wgRSSProxy ); // Example for disabling proxy use for certain urls // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) ); if ( isset( $wgRSSUrlNumberOfAllowedRedirects ) && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) { $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects; } else { $maxRedirects = 0; } // we set followRedirects intentionally to true to see error messages // in cases where the maximum number of redirects is reached $client = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( $url, [ 'timeout' => $wgRSSFetchTimeout, 'followRedirects' => true, 'maxRedirects' => $maxRedirects, 'proxy' => $wgRSSProxy, 'noProxy' => $noProxy, 'userAgent' => $wgRSSUserAgent, ], __METHOD__ ); foreach ( $headers as $header => $value ) { $client->setHeader( $header, $value ); } $fetch = $client->execute(); $this->client = $client; if ( !$fetch->isGood() ) { wfDebug( 'RSS', 'Request Failed: ' . Status::wrap( $fetch )->getWikitext() ); return $fetch; } return $this->responseToXML( $key ); } /** * @see https://bugzilla.wikimedia.org/show_bug.cgi?id=34763 * @param string $wikiText * @param Parser $origParser * @return string */ protected function sandboxParse( $wikiText, $origParser ) { $myParser = $this->parserFactory->getInstance(); $result = $myParser->parse( $wikiText, $origParser->getTitle(), $origParser->getOptions() ); $stripItems = $this->stripItems; return preg_replace_callback( "/{$this->markerString}-(\d+)-{$this->markerString}/", static function ( array $matches ) use ( $stripItems ) { $markerIndex = (int)$matches[1]; return $stripItems[$markerIndex]; }, $result->getText() ); } /** * Render the entire feed so that each item is passed to the * template which the MediaWiki then displays. * * @param Parser $parser * @param PPFrame $frame The frame param to pass to recursiveTagParse() * @return string */ public function renderFeed( $parser, $frame ) { $renderedFeed = ''; if ( isset( $this->itemTemplate ) && isset( $parser ) && isset( $frame ) ) { $headcnt = 0; if ( $this->reversed ) { $this->rss->items = array_reverse( $this->rss->items ); } foreach ( $this->rss->items as $item ) { if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) { continue; } if ( $this->canDisplay( $item ) ) { $renderedFeed .= $this->renderItem( $item, $parser ) . "\n"; $headcnt++; } } $renderedFeed = $this->sandboxParse( $renderedFeed, $parser ); } $parser->addTrackingCategory( 'rss-tracking-category' ); return $renderedFeed; } /** * Render each item, filtering it out if necessary, applying any highlighting. * * @param array $item an array produced by RSSData where keys are the names of the RSS elements * @param Parser $parser * @return mixed */ protected function renderItem( $item, $parser ) { $renderedItem = $this->itemTemplate; // $info will only be an XML element name, so we're safe using it. // $item[$info] is handled by the XML parser -- // and that means bad RSS with stuff like // will find its // rogue