v2.00 can parse ATOM feeds, at least some. This is a major improvement over pre-2.00 versions which only could read and parse RSS feeds but no ATOM feeds. Version 2.00 begins to keep care of namespaces in the XML. The parser still leaves room for further improvements. At least, E:RSS can now read E:WikiArticleFeeds generated RSS _and_ ATOM feeds.

This commit is contained in:
Thomas Gries 2012-02-24 10:16:34 +00:00
parent e7e7b70a73
commit b276af44d9
Notes: Thomas Gries 2012-02-24 10:16:34 +00:00
4 changed files with 49 additions and 14 deletions

View file

@ -11,7 +11,12 @@ http://www.mediawiki.org/wiki/Extension:RSS
(otherwise using the defaults - PHP will abort the entire program when your
memory usage gets too high)
* bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when
parsing differently flavoured RSS feeds
parsing differently flavoured RSS feeds and ATOM feeds
=== Version 2.00 2012-02-24 ===
* first version which can parse RSS and at least some ATOM feeds
partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden
Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds
=== Version 1.94 2012-02-23 ===
* changed white list definition and behaviour:

View file

@ -4,7 +4,7 @@
*
* @file
* @ingroup Extensions
* @version 1.94
* @version 2.00
* @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001
* @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS
* @author Jeroen De Dauw
@ -14,7 +14,7 @@
* @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
*/
define( "EXTENSION_RSS_VERSION", "1.94 20120223" );
define( "EXTENSION_RSS_VERSION", "2.00 20120224" );
if ( !defined( 'MEDIAWIKI' ) ) {
die( "This is not a valid entry point.\n" );
@ -49,6 +49,7 @@ $wgRSSCacheAge = 3600;
// Check cached content, if available, against remote.
// $wgRSSCacheCompare should be set to false or a timeout
// (less than $wgRSSCacheAge) after which a comparison will be made.
// for debugging set $wgRSSCacheCompare = 1;
$wgRSSCacheCompare = false;
// 5 second timeout

View file

@ -15,7 +15,28 @@ class RSSData {
return;
}
$xpath = new DOMXPath( $xml );
$items = $xpath->query( '/rss/channel/item' );
// register namespace as below, and apply a regex to the expression
// http://de3.php.net/manual/en/domxpath.query.php#103461
$namespaceURI = $xml->lookupnamespaceURI( NULL );
if ( ( null !== $namespaceURI ) ) {
$defaultNS = "defaultNS";
$xpath->registerNamespace( $defaultNS, $namespaceURI );
$defaultNS = "defaultNS:";
} else {
$defaultNS = "";
}
$q = "/rss/channel/item";
$q = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $defaultNS . '$2', $q );
$items = $xpath->query( $q ); // is it an RSS feed ?
if ( $items->length === 0 ) {
$q = "/feed/entry";
$q = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $defaultNS . '$2', $q );
$items = $xpath->query( $q ); // is it an ATOM feed ?
}
if( $items->length !== 0 ) {
foreach ( $items as $item ) {
@ -37,7 +58,7 @@ class RSSData {
$this->items[] = $bit;
}
} else {
$this->error = 'No RSS items found.';
$this->error = 'No RSS//ATOM items found.';
return;
}
}
@ -52,10 +73,11 @@ class RSSData {
* @param $n String: name of the element we have
* @return String Name to map it to
*/
protected function rssTokenToName( $n ) {
switch( $n ) {
protected function rssTokenToName( $name ) {
switch( $name ) {
case 'dc:date':
case 'pubDate':
case 'updated':
return 'date';
case 'dc:creator':
return 'author';
@ -73,9 +95,8 @@ class RSSData {
case 'comments':
case 'category':
return null;
default:
return $n;
return $name;
}
}
}

View file

@ -328,24 +328,32 @@ class RSSParser {
foreach ( array_keys( $item ) as $info ) {
switch ( $info ) {
// ATOM <id> elements and RSS <link> elements are item link urls
case 'id':
$txt = $this->sanitizeUrl( $item['id'] );
$renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem );
break;
case 'link':
$txt = $this->sanitizeUrl( $item[ $info ] );
if ( !isset( $item['id'] ) ) {
$txt = $this->sanitizeUrl( $item['link'] );
}
$renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem );
break;
case 'date':
$tempTimezone = date_default_timezone_get();
date_default_timezone_set( 'UTC' );
$txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item[ $info ] ) ) );
$txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item['date'] ) ) );
date_default_timezone_set( $tempTimezone );
$renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem );
break;
default:
$str = $this->escapeTemplateParameter( $item[ $info ] );
$str = $this->escapeTemplateParameter( $item[$info] );
if ( mb_strlen( $str ) > $this->ItemMaxLength ) {
$str = mb_substr( $str, 0, $this->ItemMaxLength ) . " ...";
}
$txt = $this->highlightTerms( $str );
$renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem );
}
$renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem );
}
// nullify all remaining info items in the template