From b276af44d9ca2092b02c399140c796a670ec24c3 Mon Sep 17 00:00:00 2001 From: Thomas Gries Date: Fri, 24 Feb 2012 10:16:34 +0000 Subject: [PATCH] v2.00 can parse ATOM feeds, at least some. This is a major improvement over pre-2.00 versions which only could read and parse RSS feeds but no ATOM feeds. Version 2.00 begins to keep care of namespaces in the XML. The parser still leaves room for further improvements. At least, E:RSS can now read E:WikiArticleFeeds generated RSS _and_ ATOM feeds. --- RELEASE-NOTES | 7 ++++++- RSS.php | 5 +++-- RSSData.php | 33 +++++++++++++++++++++++++++------ RSSParser.php | 18 +++++++++++++----- 4 files changed, 49 insertions(+), 14 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 96bfed5..b57d3d6 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -11,7 +11,12 @@ http://www.mediawiki.org/wiki/Extension:RSS (otherwise using the defaults - PHP will abort the entire program when your memory usage gets too high) * bug 30028 "Error parsing XML for RSS" - improve and harden Extension:RSS when - parsing differently flavoured RSS feeds + parsing differently flavoured RSS feeds and ATOM feeds + +=== Version 2.00 2012-02-24 === +* first version which can parse RSS and at least some ATOM feeds + partial solution of bug 30028 "Error parsing XML for RSS" - improve and harden + Extension:RSS when parsing differently flavoured RSS feeds and ATOM feeds === Version 1.94 2012-02-23 === * changed white list definition and behaviour: diff --git a/RSS.php b/RSS.php index c9f8c79..a9c80b9 100644 --- a/RSS.php +++ b/RSS.php @@ -4,7 +4,7 @@ * * @file * @ingroup Extensions - * @version 1.94 + * @version 2.00 * @author mutante, Daniel Kinzler, Rdb, Mafs, Thomas Gries, Alxndr, Chris Reigrut, K001 * @author Kellan Elliott-McCrea -- author of MagpieRSS * @author Jeroen De Dauw @@ -14,7 +14,7 @@ * @link http://www.mediawiki.org/wiki/Extension:RSS Documentation */ -define( "EXTENSION_RSS_VERSION", "1.94 20120223" ); +define( "EXTENSION_RSS_VERSION", "2.00 20120224" ); if ( !defined( 'MEDIAWIKI' ) ) { die( "This is not a valid entry point.\n" ); @@ -49,6 +49,7 @@ $wgRSSCacheAge = 3600; // Check cached content, if available, against remote. // $wgRSSCacheCompare should be set to false or a timeout // (less than $wgRSSCacheAge) after which a comparison will be made. +// for debugging set $wgRSSCacheCompare = 1; $wgRSSCacheCompare = false; // 5 second timeout diff --git a/RSSData.php b/RSSData.php index c8f4a9f..4906d4c 100644 --- a/RSSData.php +++ b/RSSData.php @@ -15,7 +15,28 @@ class RSSData { return; } $xpath = new DOMXPath( $xml ); - $items = $xpath->query( '/rss/channel/item' ); + + // register namespace as below, and apply a regex to the expression + // http://de3.php.net/manual/en/domxpath.query.php#103461 + $namespaceURI = $xml->lookupnamespaceURI( NULL ); + + if ( ( null !== $namespaceURI ) ) { + $defaultNS = "defaultNS"; + $xpath->registerNamespace( $defaultNS, $namespaceURI ); + $defaultNS = "defaultNS:"; + } else { + $defaultNS = ""; + } + + $q = "/rss/channel/item"; + $q = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $defaultNS . '$2', $q ); + $items = $xpath->query( $q ); // is it an RSS feed ? + + if ( $items->length === 0 ) { + $q = "/feed/entry"; + $q = preg_replace( '#(::|/\s*|\A)(?![/@].+?|[a-z\-]+::)#', '$1' . $defaultNS . '$2', $q ); + $items = $xpath->query( $q ); // is it an ATOM feed ? + } if( $items->length !== 0 ) { foreach ( $items as $item ) { @@ -37,7 +58,7 @@ class RSSData { $this->items[] = $bit; } } else { - $this->error = 'No RSS items found.'; + $this->error = 'No RSS//ATOM items found.'; return; } } @@ -52,10 +73,11 @@ class RSSData { * @param $n String: name of the element we have * @return String Name to map it to */ - protected function rssTokenToName( $n ) { - switch( $n ) { + protected function rssTokenToName( $name ) { + switch( $name ) { case 'dc:date': case 'pubDate': + case 'updated': return 'date'; case 'dc:creator': return 'author'; @@ -73,9 +95,8 @@ class RSSData { case 'comments': case 'category': return null; - default: - return $n; + return $name; } } } \ No newline at end of file diff --git a/RSSParser.php b/RSSParser.php index 3c16b14..05c5d68 100644 --- a/RSSParser.php +++ b/RSSParser.php @@ -328,24 +328,32 @@ class RSSParser { foreach ( array_keys( $item ) as $info ) { switch ( $info ) { + // ATOM elements and RSS elements are item link urls + case 'id': + $txt = $this->sanitizeUrl( $item['id'] ); + $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); + break; case 'link': - $txt = $this->sanitizeUrl( $item[ $info ] ); + if ( !isset( $item['id'] ) ) { + $txt = $this->sanitizeUrl( $item['link'] ); + } + $renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem ); break; case 'date': $tempTimezone = date_default_timezone_get(); date_default_timezone_set( 'UTC' ); - $txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item[ $info ] ) ) ); + $txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item['date'] ) ) ); date_default_timezone_set( $tempTimezone ); + $renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem ); break; default: - $str = $this->escapeTemplateParameter( $item[ $info ] ); + $str = $this->escapeTemplateParameter( $item[$info] ); if ( mb_strlen( $str ) > $this->ItemMaxLength ) { $str = mb_substr( $str, 0, $this->ItemMaxLength ) . " ..."; } $txt = $this->highlightTerms( $str ); + $renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem ); } - - $renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem ); } // nullify all remaining info items in the template