mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/RSS
synced 2024-11-30 18:36:08 +00:00
fix for ultra bug 30028 . The RSS extension can parse RSS and ATOM feeds of different flavours. The php xml dom xpath query uses now a namespace-safe method to find all elements like item (RSS, RDF) or entry (ATOM). Further fixed a hidden problem when the feed url was redirecting, this threw the Cannot parse RSS for XML error, which is now history. Introduced a new parameter wgRSSUrlNumberOfAllowedRedirects which defaults to zero, i.e. no redirects are allowed by default. See Manual page
This commit is contained in:
parent
0811ae7d49
commit
896a9a3274
Notes:
Thomas Gries
2012-02-27 01:10:57 +00:00
|
@ -10,13 +10,20 @@ http://www.mediawiki.org/wiki/Extension:RSS
|
||||||
coming in. Then you could abort cleanly once it's gotten too much
|
coming in. Then you could abort cleanly once it's gotten too much
|
||||||
(otherwise using the defaults - PHP will abort the entire program when your
|
(otherwise using the defaults - PHP will abort the entire program when your
|
||||||
memory usage gets too high)
|
memory usage gets too high)
|
||||||
* bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when
|
|
||||||
parsing differently flavoured RSS feeds and ATOM feeds
|
=== Version 2.10 2012-02-27 ===
|
||||||
|
* final solution of bug 30028 "Error parsing XML for RSS" - improve and harden
|
||||||
|
Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds
|
||||||
|
* new parameter $wgRSSUrlNumberOfAllowedRedirects (default = 0)
|
||||||
|
Some feed urls redirect. The new RSS version can deal with redirects,
|
||||||
|
but it must be expressly enabled. For example, you can set
|
||||||
|
$wgRSSUrlNumberOfAllowedRedirects = 1;
|
||||||
|
|
||||||
=== Version 2.01 2012-02-24 ===
|
=== Version 2.01 2012-02-24 ===
|
||||||
* "summary" element of ATOM feed items are shown
|
* "summary" element of ATOM feed items are shown
|
||||||
which is handled like "description" element of RSS
|
which is handled like "description" element of RSS
|
||||||
* handling of basic HTML layout tags <p> <br> <b> <i> <u> <s> in item description
|
* handling of basic HTML layout tags <p> <br> <b> <i> <u> <s> in item description
|
||||||
|
|
||||||
=== Version 2.00 2012-02-24 ===
|
=== Version 2.00 2012-02-24 ===
|
||||||
* first version which can parse RSS and at least some ATOM feeds
|
* first version which can parse RSS and at least some ATOM feeds
|
||||||
partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden
|
partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden
|
||||||
|
|
|
@ -22,6 +22,7 @@ $messages['en'] = array(
|
||||||
'rss-ns-permission' => 'RSS is not allowed in this namespace',
|
'rss-ns-permission' => 'RSS is not allowed in this namespace',
|
||||||
'rss-url-is-not-whitelisted' => '"$1" is not in the whitelist of allowed feeds. {{PLURAL:$3|$2 is the only allowed feed|The allowed feeds are as follows: $2}}.',
|
'rss-url-is-not-whitelisted' => '"$1" is not in the whitelist of allowed feeds. {{PLURAL:$3|$2 is the only allowed feed|The allowed feeds are as follows: $2}}.',
|
||||||
'rss-empty-whitelist' => '"$1" is not in the whitelist of allowed feeds. There are no allowed feed URLs in the whitelist.',
|
'rss-empty-whitelist' => '"$1" is not in the whitelist of allowed feeds. There are no allowed feed URLs in the whitelist.',
|
||||||
|
'rss-deprecated-wgrssallowedfeeds-found' => 'The deprecated variable $wgRSSAllowedFeeds has been detected. Since RSS version 2.0 this variable has to be replaced by $wgRSSUrlWhitelist as described in the manual page Extension:RSS.',
|
||||||
'rss-item' => '{{$1 | title = {{{title}}} | link = {{{link}}} | date = {{{date}}} | author = {{{author}}} | description = {{{description}}} }}',
|
'rss-item' => '{{$1 | title = {{{title}}} | link = {{{link}}} | date = {{{date}}} | author = {{{author}}} | description = {{{description}}} }}',
|
||||||
'rss-feed' => "<!-- the following are two alternative templates. The first is the basic default template for feeds -->; '''<span class='plainlinks'>[{{{link}}} {{{title}}}]</span>'''
|
'rss-feed' => "<!-- the following are two alternative templates. The first is the basic default template for feeds -->; '''<span class='plainlinks'>[{{{link}}} {{{title}}}]</span>'''
|
||||||
: {{{description}}}
|
: {{{description}}}
|
||||||
|
|
13
RSS.php
13
RSS.php
|
@ -4,7 +4,7 @@
|
||||||
*
|
*
|
||||||
* @file
|
* @file
|
||||||
* @ingroup Extensions
|
* @ingroup Extensions
|
||||||
* @version 2.01
|
* @version 2.10
|
||||||
* @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001
|
* @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001
|
||||||
* @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS
|
* @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS
|
||||||
* @author Jeroen De Dauw
|
* @author Jeroen De Dauw
|
||||||
|
@ -14,7 +14,7 @@
|
||||||
* @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
|
* @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
|
||||||
*/
|
*/
|
||||||
|
|
||||||
define( "EXTENSION_RSS_VERSION", "2.01 20120224" );
|
define( "EXTENSION_RSS_VERSION", "2.10 20120227" );
|
||||||
|
|
||||||
if ( !defined( 'MEDIAWIKI' ) ) {
|
if ( !defined( 'MEDIAWIKI' ) ) {
|
||||||
die( "This is not a valid entry point.\n" );
|
die( "This is not a valid entry point.\n" );
|
||||||
|
@ -52,7 +52,7 @@ $wgRSSCacheAge = 3600;
|
||||||
// for debugging set $wgRSSCacheCompare = 1;
|
// for debugging set $wgRSSCacheCompare = 1;
|
||||||
$wgRSSCacheCompare = false;
|
$wgRSSCacheCompare = false;
|
||||||
|
|
||||||
// 5 second timeout
|
// 15 second timeout
|
||||||
$wgRSSFetchTimeout = 15;
|
$wgRSSFetchTimeout = 15;
|
||||||
|
|
||||||
// Ignore the RSS tag in all but the namespaces listed here.
|
// Ignore the RSS tag in all but the namespaces listed here.
|
||||||
|
@ -77,6 +77,11 @@ $wgRSSUrlWhitelist = array();
|
||||||
// include "*" if you expressly want to allow all urls (you should not do this)
|
// include "*" if you expressly want to allow all urls (you should not do this)
|
||||||
// $wgRSSUrlWhitelist = array( "*" );
|
// $wgRSSUrlWhitelist = array( "*" );
|
||||||
|
|
||||||
|
// Maximum number of redirects to follow (defaults to 0)
|
||||||
|
// Note: this should only be used when the target URLs are trusted,
|
||||||
|
// to avoid attacks on intranet services accessible by HTTP.
|
||||||
|
$wgRSSUrlNumberOfAllowedRedirects = 0;
|
||||||
|
|
||||||
// Agent to use for fetching feeds
|
// Agent to use for fetching feeds
|
||||||
$wgRSSUserAgent = "MediaWikiRSS/" . strtok( EXTENSION_RSS_VERSION, " " ) . " (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension";
|
$wgRSSUserAgent = "MediaWikiRSS/" . strtok( EXTENSION_RSS_VERSION, " " ) . " (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension";
|
||||||
|
|
||||||
|
@ -89,4 +94,4 @@ $wgRSSDateDefaultFormat = "(Y-m-d H:i:s)";
|
||||||
// limit the number of characters in the item description
|
// limit the number of characters in the item description
|
||||||
// or set to false for unlimited length.
|
// or set to false for unlimited length.
|
||||||
// $wgRSSItemMaxLength = false;
|
// $wgRSSItemMaxLength = false;
|
||||||
// $wgRSSItemMaxLength = 100;
|
$wgRSSItemMaxLength = 200;
|
||||||
|
|
29
RSSData.php
29
RSSData.php
|
@ -16,24 +16,11 @@ class RSSData {
|
||||||
}
|
}
|
||||||
$xpath = new DOMXPath( $xml );
|
$xpath = new DOMXPath( $xml );
|
||||||
|
|
||||||
// register namespace as below, and apply a regex to the expression
|
// namespace-safe method to find all elements
|
||||||
// http://de3.php.net/manual/en/domxpath.query.php#103461
|
$items = $xpath->query( "//*[local-name() = 'item']" );
|
||||||
$namespaceURI = $xml->lookupnamespaceURI( NULL );
|
|
||||||
|
|
||||||
if ( ( null !== $namespaceURI ) ) {
|
if ( $items->length == 0 ) {
|
||||||
$defaultNS = "defaultNS";
|
$items = $xpath->query( "//*[local-name() = 'entry']" );
|
||||||
$xpath->registerNamespace( $defaultNS, $namespaceURI );
|
|
||||||
$defaultNS = "defaultNS:";
|
|
||||||
} else {
|
|
||||||
$defaultNS = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// is it an RSS feed ?
|
|
||||||
$items = $xpath->query( $this->namespacePrefixedQuery( "/rss/channel/item", $defaultNS ) );
|
|
||||||
|
|
||||||
if ( $items->length === 0 ) {
|
|
||||||
// or is it an ATOM feed ?
|
|
||||||
$items = $xpath->query( $this->namespacePrefixedQuery( "/feed/entry", $defaultNS ) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( $items->length !== 0 ) {
|
if( $items->length !== 0 ) {
|
||||||
|
@ -61,14 +48,6 @@ class RSSData {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function namespacePrefixedQuery( $query, $namespace = "" ) {
|
|
||||||
if ( $namespace !== "" ) {
|
|
||||||
$ret = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $namespace . '$2', $query );
|
|
||||||
} else {
|
|
||||||
$ret = $query;
|
|
||||||
}
|
|
||||||
return $ret;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Return a string that will be used to map RSS elements that
|
* Return a string that will be used to map RSS elements that
|
||||||
* contain similar data (e.g. dc:date, date, and pubDate) to the
|
* contain similar data (e.g. dc:date, date, and pubDate) to the
|
||||||
|
|
13
RSSHooks.php
13
RSSHooks.php
|
@ -21,19 +21,24 @@ class RSSHooks {
|
||||||
* @param $frame PPFrame parser context
|
* @param $frame PPFrame parser context
|
||||||
*/
|
*/
|
||||||
static function renderRss( $input, $args, $parser, $frame ) {
|
static function renderRss( $input, $args, $parser, $frame ) {
|
||||||
global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces, $wgRSSUrlWhitelist;
|
global $wgRSSCacheAge, $wgRSSCacheCompare, $wgRSSNamespaces,
|
||||||
|
$wgRSSUrlWhitelist,$wgRSSAllowedFeeds;
|
||||||
|
|
||||||
if ( is_array( $wgRSSNamespaces ) && count( $wgRSSNamespaces ) ) {
|
if ( is_array( $wgRSSNamespaces ) && count( $wgRSSNamespaces ) ) {
|
||||||
$ns = $parser->getTitle()->getNamespace();
|
$ns = $parser->getTitle()->getNamespace();
|
||||||
$checkNS = array_flip( $wgRSSNamespaces );
|
$checkNS = array_flip( $wgRSSNamespaces );
|
||||||
|
|
||||||
if( !isset( $checkNS[$ns] ) ) {
|
if( !isset( $checkNS[$ns] ) ) {
|
||||||
return wfMsg( 'rss-ns-permission' );
|
return RSSUtils::RSSError( 'rss-ns-permission' );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ( true ) {
|
switch ( true ) {
|
||||||
|
|
||||||
|
case ( isset( $wgRSSAllowedFeeds ) ):
|
||||||
|
return RSSUtils::RSSError( 'rss-deprecated-wgrssallowedfeeds-found' );
|
||||||
|
break;
|
||||||
|
|
||||||
# disallow because there is no whitelist or empty whitelist
|
# disallow because there is no whitelist or empty whitelist
|
||||||
case ( !isset( $wgRSSUrlWhitelist )
|
case ( !isset( $wgRSSUrlWhitelist )
|
||||||
|| !is_array( $wgRSSUrlWhitelist )
|
|| !is_array( $wgRSSUrlWhitelist )
|
||||||
|
@ -59,7 +64,7 @@ class RSSHooks {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !Http::isValidURI( $input ) ) {
|
if ( !Http::isValidURI( $input ) ) {
|
||||||
return wfMsg( 'rss-invalid-url', htmlspecialchars( $input ) );
|
return RSSutils::RSSError( 'rss-invalid-url', htmlspecialchars( $input ) );
|
||||||
}
|
}
|
||||||
if ( $wgRSSCacheCompare ) {
|
if ( $wgRSSCacheCompare ) {
|
||||||
$timeout = $wgRSSCacheCompare;
|
$timeout = $wgRSSCacheCompare;
|
||||||
|
@ -79,7 +84,7 @@ class RSSHooks {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) {
|
if ( !is_object( $rss->rss ) || !is_array( $rss->rss->items ) ) {
|
||||||
return wfMsg( 'rss-empty', htmlspecialchars( $input ) );
|
return RSSUtils::RSSError( 'rss-empty', htmlspecialchars( $input ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
return $rss->renderFeed( $parser, $frame );
|
return $rss->renderFeed( $parser, $frame );
|
||||||
|
|
|
@ -218,7 +218,8 @@ class RSSParser {
|
||||||
* @return Status object
|
* @return Status object
|
||||||
*/
|
*/
|
||||||
protected function fetchRemote( $key, array $headers = array()) {
|
protected function fetchRemote( $key, array $headers = array()) {
|
||||||
global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy;
|
global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy,
|
||||||
|
$wgRSSUrlNumberOfAllowedRedirects;
|
||||||
|
|
||||||
if ( $this->etag ) {
|
if ( $this->etag ) {
|
||||||
wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
|
wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
|
||||||
|
@ -244,16 +245,54 @@ class RSSParser {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
$url = $this->url;
|
$url = $this->url;
|
||||||
$noProxy = false;
|
$noProxy = !isset( $wgRSSProxy );
|
||||||
|
|
||||||
// Example for disabling proxy use for certain urls
|
// Example for disabling proxy use for certain urls
|
||||||
// $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) );
|
// $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copied from HttpFunctions.php
|
||||||
|
* Perform an HTTP request
|
||||||
|
*
|
||||||
|
* @param $method String: HTTP method. Usually GET/POST
|
||||||
|
* @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL
|
||||||
|
* @param $options Array: options to pass to MWHttpRequest object.
|
||||||
|
* Possible keys for the array:
|
||||||
|
* - timeout Timeout length in seconds
|
||||||
|
* - postData An array of key-value pairs or a url-encoded form data
|
||||||
|
* - proxy The proxy to use.
|
||||||
|
* Otherwise it will use $wgHTTPProxy (if set)
|
||||||
|
* Otherwise it will use the environment variable "http_proxy" (if set)
|
||||||
|
* - noProxy Don't use any proxy at all. Takes precedence over proxy value(s).
|
||||||
|
* - sslVerifyHost (curl only) Verify hostname against certificate
|
||||||
|
* - sslVerifyCert (curl only) Verify SSL certificate
|
||||||
|
* - caInfo (curl only) Provide CA information
|
||||||
|
* - maxRedirects Maximum number of redirects to follow (defaults to 5)
|
||||||
|
* - followRedirects Whether to follow redirects (defaults to false).
|
||||||
|
* Note: this should only be used when the target URL is trusted,
|
||||||
|
* to avoid attacks on intranet services accessible by HTTP.
|
||||||
|
* - userAgent A user agent, if you want to override the default
|
||||||
|
* MediaWiki/$wgVersion
|
||||||
|
* @return Mixed: (bool)false on failure or a string on success
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ( isset( $wgRSSUrlNumberOfAllowedRedirects )
|
||||||
|
&& is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) {
|
||||||
|
$maxRedirects = $wgRSSUrlNumberOfAllowedRedirects;
|
||||||
|
} else {
|
||||||
|
$maxRedirects = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we set followRedirects intentionally to true to see error messages
|
||||||
|
// in cases where the maximum number of redirects is reached
|
||||||
$client = HttpRequest::factory( $url,
|
$client = HttpRequest::factory( $url,
|
||||||
array(
|
array(
|
||||||
'timeout' => $wgRSSFetchTimeout,
|
'timeout' => $wgRSSFetchTimeout,
|
||||||
'proxy' => $wgRSSProxy,
|
'followRedirects' => true,
|
||||||
'noProxy' => $noProxy,
|
'maxRedirects' => $maxRedirects,
|
||||||
|
'proxy' => $wgRSSProxy,
|
||||||
|
'noProxy' => $noProxy,
|
||||||
|
'userAgent' => $wgRSSUserAgent,
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -506,8 +545,8 @@ class RSSParser {
|
||||||
*
|
*
|
||||||
* @param $text String: the text to examine
|
* @param $text String: the text to examine
|
||||||
* @param $filterType String: "filterOut" to check for matches in the
|
* @param $filterType String: "filterOut" to check for matches in the
|
||||||
* filterOut member list.
|
* filterOut member list.
|
||||||
* Otherwise, uses the filter member list.
|
* Otherwise, uses the filter member list.
|
||||||
* @return Boolean: decision to filter or not.
|
* @return Boolean: decision to filter or not.
|
||||||
*/
|
*/
|
||||||
protected function filter( $text, $filterType ) {
|
protected function filter( $text, $filterType ) {
|
||||||
|
@ -591,7 +630,7 @@ class RSSUtils {
|
||||||
* @param String|Array $param Error parameter (or parameters)
|
* @param String|Array $param Error parameter (or parameters)
|
||||||
* @return String Html that is the error.
|
* @return String Html that is the error.
|
||||||
*/
|
*/
|
||||||
public static function RSSError( $errorMessageName, $param ) {
|
public static function RSSError( $errorMessageName, $param = false ) {
|
||||||
|
|
||||||
// Anything from a parser tag should use Content lang for message,
|
// Anything from a parser tag should use Content lang for message,
|
||||||
// since the cache doesn't vary by user language: do not use wfMsgForContent but wfMsgForContent
|
// since the cache doesn't vary by user language: do not use wfMsgForContent but wfMsgForContent
|
||||||
|
|
Loading…
Reference in a new issue