diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 14bd3ba..a2b1540 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -2,6 +2,8 @@ RELEASE NOTES of the MediaWiki extension RSS http://www.mediawiki.org/wiki/Extension:RSS === TO DO === +* bug 30377 add a new parameter to limit the number of characters when rendering + the channel item * set an upper default limit for HttpRequest request size when fetching feeds doing a HEAD request first to ask for the size but that value may not be available. Check how much data is returned as its coming back @@ -10,56 +12,8 @@ http://www.mediawiki.org/wiki/Extension:RSS coming in. Then you could abort cleanly once it's gotten too much (otherwise using the defaults - PHP will abort the entire program when your memory usage gets too high) - -=== Version 2.12 2012-03-07 === -* bug fix 34763 "RSS feed items (HTML) are not rendered as HTML but htmlescaped" -* regression bug 30377 "Add a new parameter to limit the number of characters - when rendering the channel item ". Feed item string length - limitation is difficult when we allow HTML or tags, because a mere - content-unaware limitation breaks (can break) tags which results in disastrous - rendering results. - -=== Version 2.11 2012-02-29 === -* function name typo correction - -=== Version 2.10 2012-02-27 === -* final solution of bug 30028 "Error parsing XML for RSS" - improve and harden - Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds -* new parameter $wgRSSUrlNumberOfAllowedRedirects (default = 0) - Some feed urls redirect. The new RSS version can deal with redirects, - but it must be expressly enabled. For example, you can set - $wgRSSUrlNumberOfAllowedRedirects = 1; - -=== Version 2.01 2012-02-24 === -* "summary" element of ATOM feed items are shown - which is handled like "description" element of RSS -* handling of basic HTML layout tags


in item description - -=== Version 2.00 2012-02-24 === -* first version which can parse RSS and at least some ATOM feeds - partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden - Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds - -=== Version 1.94 2012-02-23 === -* changed white list definition and behaviour: - - 1. changed the name from $wgRSSAllowedFeeds to $wgRSSUrlWhitelist - 2. behaviour has been changed - - the new behaviour is: - $wgRSSUrlWhitelist is empty by default. Since version 1.94 it must be - expressly set to an array( list-of-comma-separated-allowed-RSS-urls-strings ) - or set to array( "*" ) if you want to allow any url - - the old behaviour was: - $wgRSSAllowedFeeds was empty by default and empty meant that every Url - was allowed by default. This has been changed, see new behaviour. - -=== Version 1.92 2012-02-13 === -* added optional date= attribute and $wgRSSDateDefaultFormat parameter -* added optional item-max-length= attribute and $wgRSSItemMaxLength parameter - fixes bug 30377 add a new parameter to limit the number of characters when - rendering the channel item +* bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when + parsing differently flavoured RSS feeds === Version 1.90 2011-08-15 === * removed parsing of each single channel subelement (item) diff --git a/RSS.i18n.php b/RSS.i18n.php index a7524b9..c627ec8 100644 --- a/RSS.i18n.php +++ b/RSS.i18n.php @@ -14,16 +14,13 @@ $messages = array(); $messages['en'] = array( 'rss-desc' => 'Displays RSS feeds on MediaWiki pages in a standard or in user-definable formats using template pages', - 'rss-tracking-category' => 'Pages with RSS feeds', 'rss-error' => 'Failed to load RSS feed from $1: $2', 'rss-empty' => 'Failed to load RSS feed from $1!', 'rss-fetch-nourl' => 'Fetch called without a URL!', 'rss-invalid-url' => 'Not a valid URL: $1', 'rss-parse-error' => 'Error parsing XML for RSS', 'rss-ns-permission' => 'RSS is not allowed in this namespace', - 'rss-url-is-not-whitelisted' => '"$1" is not in the whitelist of allowed feeds. {{PLURAL:$3|$2 is the only allowed feed|The allowed feeds are as follows: $2}}.', - 'rss-empty-whitelist' => '"$1" is not in the whitelist of allowed feeds. There are no allowed feed URLs in the whitelist.', - 'rss-deprecated-wgrssallowedfeeds-found' => 'The deprecated variable $wgRSSAllowedFeeds has been detected. Since RSS version 2.0 this variable has to be replaced by $wgRSSUrlWhitelist as described in the manual page Extension:RSS.', + 'rss-url-permission' => 'This URL is not allowed to be included', 'rss-item' => '{{$1 | title = {{{title}}} | link = {{{link}}} | date = {{{date}}} | author = {{{author}}} | description = {{{description}}} }}', 'rss-feed' => "; '''[{{{link}}} {{{title}}}]''' : {{{description}}} @@ -35,7 +32,6 @@ $messages['en'] = array( * @author Yekrats */ $messages['qqq'] = array( - 'rss-tracking-category' => 'The name of a category for all pages which use the <rss> parser extension tag. The category is automatically added unless the feature is disabled.', 'rss-invalid-url' => '$1 is the invalid URL for the RSS feed', 'rss-feed' => "; $1 : ''not to be localised'' diff --git a/RSS.php b/RSS.php index 9fdb7bd..1fe406d 100644 --- a/RSS.php +++ b/RSS.php @@ -4,7 +4,7 @@ * * @file * @ingroup Extensions - * @version 2.15 + * @version 1.90 * @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001 * @author Kellan Elliott-McCrea -- author of MagpieRSS * @author Jeroen De Dauw @@ -14,8 +14,6 @@ * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation */ -define( "EXTENSION_RSS_VERSION", "2.15 20120319" ); - if ( !defined( 'MEDIAWIKI' ) ) { die( "This is not a valid entry point.\n" ); } @@ -28,7 +26,7 @@ $wgExtensionCredits['parserhook'][] = array( 'Rdb', 'Mafs', 'Alxndr', 'Thomas Gries', 'Chris Reigrut', 'K001', 'Jack Phoenix', 'Jeroen De Dauw', 'Mark A. Hershberger' ), - 'version' => EXTENSION_RSS_VERSION, + 'version' => '1.90 20110815', 'url' => 'https://www.mediawiki.org/wiki/Extension:RSS', 'descriptionmsg' => 'rss-desc', ); @@ -38,63 +36,32 @@ $dir = dirname( __FILE__ ) . '/'; $wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php'; $wgAutoloadClasses['RSSHooks'] = $dir . 'RSSHooks.php'; $wgAutoloadClasses['RSSParser'] = $dir . 'RSSParser.php'; -$wgAutoloadClasses['RSSUtils'] = $dir . 'RSSParser.php'; $wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php'; $wgHooks['ParserFirstCallInit'][] = 'RSSHooks::parserInit'; -// one hour -$wgRSSCacheAge = 3600; + // one hour + $wgRSSCacheAge = 3600; // Check cached content, if available, against remote. // $wgRSSCacheCompare should be set to false or a timeout // (less than $wgRSSCacheAge) after which a comparison will be made. -// for debugging set $wgRSSCacheCompare = 1; $wgRSSCacheCompare = false; -// 15 second timeout -$wgRSSFetchTimeout = 15; +// 5 second timeout +$wgRSSFetchTimeout = 5; // Ignore the RSS tag in all but the namespaces listed here. // null (the default) means the tag can be used anywhere. $wgRSSNamespaces = null; -// Whitelist of allowed RSS Urls -// -// If there are items in the array, and the user supplied URL is not in the array, -// the url will not be allowed -// -// Urls are case-sensitively tested against values in the array. -// They must exactly match including any trailing "/" character. -// -// Warning: Allowing all urls (not setting a whitelist) -// may be a security concern. -// -// an empty or non-existent array means: no whitelist defined -// this is the default: an empty whitelist. No servers are allowed by default. -$wgRSSUrlWhitelist = array(); - -// include "*" if you expressly want to allow all urls (you should not do this) -// $wgRSSUrlWhitelist = array( "*" ); - -// Maximum number of redirects to follow (defaults to 0) -// Note: this should only be used when the target URLs are trusted, -// to avoid attacks on intranet services accessible by HTTP. -$wgRSSUrlNumberOfAllowedRedirects = 0; +// URL whitelist of RSS Feeds: +// if there are items in the array, and the used URL isn't in the array, +// it will not be allowed (originally proposed in bug 27768) +$wgRSSAllowedFeeds = array(); // Agent to use for fetching feeds -$wgRSSUserAgent = "MediaWikiRSS/" . strtok( EXTENSION_RSS_VERSION, " " ) . " (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension"; +$wgRSSUserAgent = 'MediaWikiRSS/0.02 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension'; // Proxy server to use for fetching feeds $wgRSSProxy = false; - -// default date format of item publication dates see http://www.php.net/date -$wgRSSDateDefaultFormat = "(Y-m-d H:i:s)"; - -// limit the number of characters in the item description -// or set to false for unlimited length. -// THIS IS CURRENTLY NOT WORKING (bug 30377) -$wgRSSItemMaxLength = false; - -// You can choose to allow active links in feed items; default: false -$wgRSSAllowLinkTag = false; diff --git a/RSSData.php b/RSSData.php index 6e51a0c..de3fa20 100644 --- a/RSSData.php +++ b/RSSData.php @@ -15,13 +15,7 @@ class RSSData { return; } $xpath = new DOMXPath( $xml ); - - // namespace-safe method to find all elements - $items = $xpath->query( "//*[local-name() = 'item']" ); - - if ( $items->length == 0 ) { - $items = $xpath->query( "//*[local-name() = 'entry']" ); - } + $items = $xpath->query( '/rss/channel/item' ); if( $items->length !== 0 ) { foreach ( $items as $item ) { @@ -43,7 +37,7 @@ class RSSData { $this->items[] = $bit; } } else { - $this->error = 'No RSS//ATOM items found.'; + $this->error = 'No RSS items found.'; return; } } @@ -58,16 +52,18 @@ class RSSData { * @param $n String: name of the element we have * @return String Name to map it to */ - protected function rssTokenToName( $name ) { - switch( $name ) { + protected function rssTokenToName( $n ) { + switch( $n ) { case 'dc:date': - case 'pubDate': - case 'updated': return 'date'; + # parse "2010-10-18T18:07:00Z" + case 'pubDate': + return 'date'; + # parse RFC date case 'dc:creator': return 'author'; - case 'summary': - return 'description'; + case 'title': + return 'title'; case 'content:encoded': return 'encodedContent'; @@ -80,8 +76,9 @@ class RSSData { case 'comments': case 'category': return null; + default: - return $name; + return $n; } } } \ No newline at end of file diff --git a/RSSHooks.php b/RSSHooks.php index fe664b8..2a870e5 100644 --- a/RSSHooks.php +++ b/RSSHooks.php @@ -1,7 +1,6 @@ elements * @param $parser Parser Object @@ -20,51 +19,24 @@ class RSSHooks { * @param $parser Parser * @param $frame PPFrame parser context */ - static function renderRss( $input, array $args, Parser $parser, PPFrame $frame ) { - global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces, - $wgRSSUrlWhitelist,$wgRSSAllowedFeeds; + static function renderRss( $input, $args, $parser, $frame ) { + global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces, $wgRSSAllowedFeeds; if ( is_array( $wgRSSNamespaces ) && count( $wgRSSNamespaces ) ) { $ns = $parser->getTitle()->getNamespace(); $checkNS = array_flip( $wgRSSNamespaces ); if( !isset( $checkNS[$ns] ) ) { - return RSSUtils::RSSError( 'rss-ns-permission' ); + return wfMsg( 'rss-ns-permission' ); } } - switch ( true ) { - - case ( isset( $wgRSSAllowedFeeds ) ): - return RSSUtils::RSSError( 'rss-deprecated-wgrssallowedfeeds-found' ); - break; - - # disallow because there is no whitelist or empty whitelist - case ( !isset( $wgRSSUrlWhitelist ) - || !is_array( $wgRSSUrlWhitelist ) - || ( count( $wgRSSUrlWhitelist ) === 0 ) ): - return RSSUtils::RSSError( 'rss-empty-whitelist', - $input - ); - break; - - # allow - case ( in_array( "*", $wgRSSUrlWhitelist ) ): - case ( in_array( $input, $wgRSSUrlWhitelist ) ): - break; - - # otherwise disallow - case ( !in_array( $input, $wgRSSUrlWhitelist ) ): - default: - $listOfAllowed = $parser->getFunctionLang()->listToText( $wgRSSUrlWhitelist ); - $numberAllowed = $parser->getFunctionLang()->formatNum( count( $wgRSSUrlWhitelist ) ); - return RSSUtils::RSSError( 'rss-url-is-not-whitelisted', - array( $input, $listOfAllowed, $numberAllowed ) - ); + if ( count( $wgRSSAllowedFeeds ) && !in_array( $input, $wgRSSAllowedFeeds ) ) { + return wfMsg( 'rss-url-permission' ); } - + if ( !Http::isValidURI( $input ) ) { - return RSSUtils::RSSError( 'rss-invalid-url', htmlspecialchars( $input ) ); + return wfMsg( 'rss-invalid-url', htmlspecialchars( $input ) ); } if ( $wgRSSCacheCompare ) { $timeout = $wgRSSCacheCompare; @@ -84,10 +56,9 @@ class RSSHooks { } if ( !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) { - return RSSUtils::RSSError( 'rss-empty', htmlspecialchars( $input ) ); + return wfMsg( 'rss-empty', htmlspecialchars( $input ) ); } return $rss->renderFeed( $parser, $frame ); } - } diff --git a/RSSParser.php b/RSSParser.php index 205baf9..8c5eb7e 100644 --- a/RSSParser.php +++ b/RSSParser.php @@ -2,8 +2,6 @@ class RSSParser { protected $maxheads = 32; - protected $date = "Y-m-d H:i:s"; - protected $ItemMaxLength = 200; protected $reversed = false; protected $highlight = array(); protected $filter = array(); @@ -39,8 +37,6 @@ class RSSParser { * and return an object that can produce rendered output. */ function __construct( $url, $args ) { - global $wgRSSDateDefaultFormat,$wgRSSItemMaxLength; - $this->url = $url; # Get max number of headlines from argument-array @@ -54,13 +50,11 @@ class RSSParser { } # Get date format from argument array - # or use a default value + # FIXME: not used yet if ( isset( $args['date'] ) ) { $this->date = $args['date']; - } elseif ( isset( $wgRSSDateDefaultFormat ) ) { - $this->date = $wgRSSDateDefaultFormat; } - + # Get highlight terms from argument array if ( isset( $args['highlight'] ) ) { # mapping to lowercase here so the regex can be case insensitive below. @@ -72,13 +66,6 @@ class RSSParser { $this->filter = self::explodeOnSpaces( $args['filter'] ); } - # Get a maximal length for item texts - if ( isset( $args['item-max-length'] ) ) { - $this->ItemMaxLength = $args['item-max-length']; - } elseif ( is_numeric( $wgRSSItemMaxLength ) ) { - $this->ItemMaxLength = $wgRSSItemMaxLength; - } - if ( isset( $args['filterout'] ) ) { $this->filterOut = self::explodeOnSpaces( $args['filterout'] ); } @@ -218,8 +205,7 @@ class RSSParser { * @return Status object */ protected function fetchRemote( $key, array $headers = array()) { - global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy, - $wgRSSUrlNumberOfAllowedRedirects; + global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy; if ( $this->etag ) { wfDebugLog( 'RSS', 'Used etag: ' . $this->etag ); @@ -231,71 +217,12 @@ class RSSParser { $headers['If-Modified-Since'] = $lm; } - /** - * 'noProxy' can conditionally be set as shown in the commented - * example below; in HttpRequest 'noProxy' takes precedence over - * any value of 'proxy' and disables the use of a proxy. - * - * This is useful if you run the wiki in an intranet and need to - * access external feed urls through a proxy but internal feed - * urls must be accessed without a proxy. - * - * The general handling of such cases will be subject of a - * forthcoming version. - */ - - $url = $this->url; - $noProxy = !isset( $wgRSSProxy ); - - // Example for disabling proxy use for certain urls - // $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) ); - - /** - * Copied from HttpFunctions.php - * Perform an HTTP request - * - * @param $method String: HTTP method. Usually GET/POST - * @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL - * @param $options Array: options to pass to MWHttpRequest object. - * Possible keys for the array: - * - timeout Timeout length in seconds - * - postData An array of key-value pairs or a url-encoded form data - * - proxy The proxy to use. - * Otherwise it will use $wgHTTPProxy (if set) - * Otherwise it will use the environment variable "http_proxy" (if set) - * - noProxy Don't use any proxy at all. Takes precedence over proxy value(s). - * - sslVerifyHost (curl only) Verify hostname against certificate - * - sslVerifyCert (curl only) Verify SSL certificate - * - caInfo (curl only) Provide CA information - * - maxRedirects Maximum number of redirects to follow (defaults to 5) - * - followRedirects Whether to follow redirects (defaults to false). - * Note: this should only be used when the target URL is trusted, - * to avoid attacks on intranet services accessible by HTTP. - * - userAgent A user agent, if you want to override the default - * MediaWiki/$wgVersion - * @return Mixed: (bool)false on failure or a string on success - */ - - if ( isset( $wgRSSUrlNumberOfAllowedRedirects ) - && is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) { - $maxRedirects = $wgRSSUrlNumberOfAllowedRedirects; - } else { - $maxRedirects = 0; - } - - // we set followRedirects intentionally to true to see error messages - // in cases where the maximum number of redirects is reached - $client = HttpRequest::factory( $url, - array( - 'timeout' => $wgRSSFetchTimeout, - 'followRedirects' => true, - 'maxRedirects' => $maxRedirects, - 'proxy' => $wgRSSProxy, - 'noProxy' => $noProxy, - 'userAgent' => $wgRSSUserAgent, - ) - ); + $client = HttpRequest::factory( $this->url, array( + 'timeout' => $wgRSSFetchTimeout, + 'proxy' => $wgRSSProxy + ) ); + $client->setUserAgent( $wgRSSUserAgent ); foreach ( $headers as $header => $value ) { $client->setHeader( $header, $value ); } @@ -312,14 +239,6 @@ class RSSParser { return $ret; } - function sandboxParse($wikiText) { - global $wgTitle, $wgUser; - $myParser = new Parser(); - $myParserOptions = ParserOptions::newFromUser($wgUser); - $result = $myParser->parse($wikiText, $wgTitle, $myParserOptions); - return $result->getText(); - } - /** * Render the entire feed so that each item is passed to the * template which the MediaWiki then displays. @@ -328,7 +247,7 @@ class RSSParser { * @param $frame the frame param to pass to recursiveTagParse() */ function renderFeed( $parser, $frame ) { - + $renderedFeed = ''; if ( isset( $this->itemTemplate ) && isset( $parser ) && isset( $frame ) ) { @@ -344,16 +263,15 @@ class RSSParser { } if ( $this->canDisplay( $item ) ) { - $renderedFeed .= $this->renderItem( $item, $parser ) . "\n"; + $renderedFeed .= $this->renderItem( $item ) . "\n"; $headcnt++; } } - $renderedFeed = $this->sandboxParse( $renderedFeed ); + $renderedFeed = $parser->recursiveTagParse( $renderedFeed, $frame ); - } - - $parser->addTrackingCategory( 'rss-tracking-category' ); + } + return $renderedFeed; } @@ -362,7 +280,7 @@ class RSSParser { * * @param $item Array: an array produced by RSSData where keys are the names of the RSS elements */ - protected function renderItem( $item, $parser ) { + protected function renderItem( $item ) { $renderedItem = $this->itemTemplate; @@ -371,38 +289,14 @@ class RSSParser { // and that means bad RSS with stuff like // will find its // rogue