diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 96bfed5..b57d3d6 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -11,7 +11,12 @@ http://www.mediawiki.org/wiki/Extension:RSS (otherwise using the defaults - PHP will abort the entire program when your memory usage gets too high) * bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when - parsing differently flavoured RSS feeds + parsing differently flavoured RSS feeds and ATOM feeds + +=== Version 2.00 2012-02-24 === +* first version which can parse RSS and at least some ATOM feeds + partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden + Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds === Version 1.94 2012-02-23 === * changed white list definition and behaviour: diff --git a/RSS.php b/RSS.php index c9f8c79..a9c80b9 100644 --- a/RSS.php +++ b/RSS.php @@ -4,7 +4,7 @@ * * @file * @ingroup Extensions - * @version 1.94 + * @version 2.00 * @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001 * @author Kellan Elliott-McCrea -- author of MagpieRSS * @author Jeroen De Dauw @@ -14,7 +14,7 @@ * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation */ -define( "EXTENSION_RSS_VERSION", "1.94 20120223" ); +define( "EXTENSION_RSS_VERSION", "2.00 20120224" ); if ( !defined( 'MEDIAWIKI' ) ) { die( "This is not a valid entry point.\n" ); @@ -49,6 +49,7 @@ $wgRSSCacheAge = 3600; // Check cached content, if available, against remote. // $wgRSSCacheCompare should be set to false or a timeout // (less than $wgRSSCacheAge) after which a comparison will be made. +// for debugging set $wgRSSCacheCompare = 1; $wgRSSCacheCompare = false; // 5 second timeout diff --git a/RSSData.php b/RSSData.php index c8f4a9f..4906d4c 100644 --- a/RSSData.php +++ b/RSSData.php @@ -15,7 +15,28 @@ class RSSData { return; } $xpath = new DOMXPath( $xml ); - $items = $xpath->query( '/rss/channel/item' ); + + // register namespace as below, and apply a regex to the expression + // http://de3.php.net/manual/en/domxpath.query.php#103461 + $namespaceURI = $xml->lookupnamespaceURI( NULL ); + + if ( ( null !== $namespaceURI ) ) { + $defaultNS = "defaultNS"; + $xpath->registerNamespace( $defaultNS, $namespaceURI ); + $defaultNS = "defaultNS:"; + } else { + $defaultNS = ""; + } + + $q = "/rss/channel/item"; + $q = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $defaultNS . '$2', $q ); + $items = $xpath->query( $q ); // is it an RSS feed ? + + if ( $items->length === 0 ) { + $q = "/feed/entry"; + $q = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $defaultNS . '$2', $q ); + $items = $xpath->query( $q ); // is it an ATOM feed ? + } if( $items->length !== 0 ) { foreach ( $items as $item ) { @@ -37,7 +58,7 @@ class RSSData { $this->items[] = $bit; } } else { - $this->error = 'No RSS items found.'; + $this->error = 'No RSS//ATOM items found.'; return; } } @@ -52,10 +73,11 @@ class RSSData { * @param $n String: name of the element we have * @return String Name to map it to */ - protected function rssTokenToName( $n ) { - switch( $n ) { + protected function rssTokenToName( $name ) { + switch( $name ) { case 'dc:date': case 'pubDate': + case 'updated': return 'date'; case 'dc:creator': return 'author'; @@ -73,9 +95,8 @@ class RSSData { case 'comments': case 'category': return null; - default: - return $n; + return $name; } } } \ No newline at end of file diff --git a/RSSParser.php b/RSSParser.php index 3c16b14..05c5d68 100644 --- a/RSSParser.php +++ b/RSSParser.php @@ -328,24 +328,32 @@ class RSSParser { foreach ( array_keys( $item ) as $info ) { switch ( $info ) { + // ATOM elements and RSS elements are item link urls + case 'id': + $txt = $this->sanitizeUrl( $item['id'] ); + $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); + break; case 'link': - $txt = $this->sanitizeUrl( $item[ $info ] ); + if ( !isset( $item['id'] ) ) { + $txt = $this->sanitizeUrl( $item['link'] ); + } + $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); break; case 'date': $tempTimezone = date_default_timezone_get(); date_default_timezone_set( 'UTC' ); - $txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item[ $info ] ) ) ); + $txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item['date'] ) ) ); date_default_timezone_set( $tempTimezone ); + $renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem ); break; default: - $str = $this->escapeTemplateParameter( $item[ $info ] ); + $str = $this->escapeTemplateParameter( $item[$info] ); if ( mb_strlen( $str ) > $this->ItemMaxLength ) { $str = mb_substr( $str, 0, $this->ItemMaxLength ) . " ..."; } $txt = $this->highlightTerms( $str ); + $renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem ); } - - $renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem ); } // nullify all remaining info items in the template