* Use [[Template:RSSPost]] (and [[Mediawiki:Rss-item]]) (or another

specified template) to display the feeds and remove hard-coded HTML.
* Add check to make sure the cache dir is writable and put an error in
  the debug log if not.
* Remove Snoopy.class.php — replace with use of the Http class.
* Remove RSSParse.php — which was a copy of MagpieRSS and use plain
  XML parsing.  MagpieRSS looks unmaintained and has security
  problems.  http://www.madirish.net/?article=253 
* Remove references to Magpie RSS
* Remove (over?) use of wf{Supress,Restore}Warnings
* Construct simple RSSData class for normalizing (some of) the feed
  data.
This commit is contained in:
Mark A. Hershberger 2010-10-19 21:54:53 +00:00
parent d06c97258b
commit e14f1ce5f2
7 changed files with 232 additions and 1975 deletions

View file

@ -15,8 +15,14 @@ $messages['en'] = array(
'rss-desc' => 'Displays an RSS feed on a wiki page',
'rss-error' => 'Failed to load RSS feed from $1: $2',
'rss-empty' => 'Failed to load RSS feed from $1!',
'rss-item' => '{{RSSPost | title = {{{title}}} | link = {{{link}}} | date = {{{date}}} | author = {{{author}}} }}',
);
$messages['qqq'] = array(
'rss-item' => 'Do not translate',
);
/** Afrikaans (Afrikaans)
* @author Naudefj
*/

246
RSS.php
View file

@ -42,7 +42,7 @@ $wgExtensionCredits['parserhook'][] = array(
// Internationalization file and autoloadable classes
$dir = dirname( __FILE__ ) . '/';
$wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php';
$wgAutoloadClasses['MagpieRSS'] = $dir . 'RSSParse.php';
$wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php';
$wgAutoloadClasses['RSSCache'] = $dir . 'RSSCache.php';
$wgHooks['ParserFirstCallInit'][] = 'wfRssExtension';
@ -55,51 +55,32 @@ function wfRssExtension( &$parser ) {
}
# Parser hook callback function
function renderRss( $input ) {
global $wgOutputEncoding, $wgParser;
function renderRss( $input, $args, $parser, $frame ) {
global $wgOutputEncoding;
// Kill parser cache
$wgParser->disableCache();
$parser->disableCache();
if ( !$input ) {
return ''; # if <rss>-section is empty, return nothing
}
# Parse fields in rss section
$fields = explode( '|', $input );
wfSuppressWarnings();
$url = $fields[0];
wfRestoreWarnings();
$args = array();
for ( $i = 1; $i < sizeof( $fields ); $i++ ) {
$f = $fields[$i];
if ( strpos( $f, '=' ) === false ) {
$args[strtolower( trim( $f ) )] = false;
} else {
list( $k, $v ) = explode( '=', $f, 2 );
if ( trim( $v ) == false ) {
$args[strtolower( trim( $k ) )] = false;
} else {
$args[strtolower( trim( $k ) )] = trim( $v );
}
}
}
$url = $input;
# Get charset from argument array
wfSuppressWarnings();
$charset = $args['charset'];
wfRestoreWarnings();
if( !$charset ) {
if( isset( $args['charset'] ) ) {
$charset = $args['charset'];
} else {
$charset = $wgOutputEncoding;
}
# Get max number of headlines from argument-array
wfSuppressWarnings();
$maxheads = $args['max'];
wfRestoreWarnings();
$headcnt = 0;
if( isset( $args['max'] ) ) {
$maxheads = $args['max'];
} else {
$maxheads = 32;
}
# Get short flag from argument array
# If short is set, no description text is printed
@ -111,52 +92,57 @@ function renderRss( $input ) {
# Get reverse flag from argument array
if( isset( $args['reverse'] ) ) {
$reverse = true;
} else {
$reverse = false;
$rss->items = array_reverse( $rss->items );
}
# Get date format from argument array
if ( isset( $args['date'] ) ) {
wfSuppressWarnings();
$date = $args['date'];
wfRestoreWarnings();
if ( $date == '' ) {
$date = 'd M Y H:i';
}
} else {
$date = false;
$date = 'd M Y H:i';
}
# Get highlight terms from argument array
wfSuppressWarnings();
$rssHighlight = $args['highlight'];
wfRestoreWarnings();
$rssHighlight = str_replace( ' ', ' ', $rssHighlight );
$rssHighlight = explode( ' ', trim( $rssHighlight ) );
if( isset( $args['highlight'] ) ) {
$rssHighlight = $args['highlight'];
$rssHighlight = str_replace( ' ', ' ', $rssHighlight );
$rssHighlight = explode( ' ', trim( $rssHighlight ) );
} else {
$rssHighlight = false;
}
# Get filter terms from argument array
wfSuppressWarnings();
$rssFilter = $args['filter'];
wfRestoreWarnings();
$rssFilter = str_replace( ' ', ' ', $rssFilter );
$rssFilter = explode( ' ', trim( $rssFilter ) );
if( isset( $args['filter'] ) ) {
$rssFilter = $args['filter'];
$rssFilter = str_replace( ' ', ' ', $rssFilter );
$rssFilter = explode( ' ', trim( $rssFilter ) );
} else {
$rssFilter = false;
}
# Filterout terms
wfSuppressWarnings();
$rssFilterout = $args['filterout'];
wfRestoreWarnings();
$rssFilterout = str_replace( ' ', ' ', $rssFilterout );
$rssFilterout = explode( ' ', trim( $rssFilterout ) );
if( isset( $args['filterout'] ) ) {
$rssFilterout = $args['filterout'];
$rssFilterout = str_replace( ' ', ' ', $rssFilterout );
$rssFilterout = explode( ' ', trim( $rssFilterout ) );
} else {
$rssFilterout = false;
}
if( isset( $args['template'] ) ) {
$template = 'Template:'.$args['template'];
} else {
$template = wfMsgNoTrans("rss-item");
}
$headcnt = 0;
# Fetch RSS. May be cached locally.
# Refer to the documentation of MagpieRSS for details.
if ( !function_exists( 'fetch_rss' ) ) {
include( dirname( __FILE__ ) . '/RSSFetch.php' ); // provides fetch_rss() function
}
wfSuppressWarnings();
$rss = fetch_rss( $url );
wfRestoreWarnings();
# Check for errors.
if ( empty( $rss ) ) {
@ -171,110 +157,50 @@ function renderRss( $input ) {
return '<div>' . wfMsg( 'rss-empty', $url ) . '</div>';
}
# Build title line
#$title = iconv( $charset, $wgOutputEncoding, $rss->channel['title'] );
#if( $rss->channel['link'] ) $title = "<a href='" . $rss->channel['link'] . "'>$title</a>";
$output = '';
if( $reverse ) {
$rss->items = array_reverse( $rss->items );
}
$description = false;
foreach ( $rss->items as $item ) {
if ( isset( $item['description'] ) && $item['description'] ) {
$description = true;
break;
/* This would be better served by preg_replace_callback, but
* I can't create a callback that carries $item in PHP < 5.3
*/
if ( $template ) {
$headcnt = 0;
foreach( $rss->items as $item ) {
if($maxheads > 0 && $headcnt >= $maxheads) continue;
$decision = true;
foreach(array('title', 'author', 'description', 'category') as $check) {
if( isset( $item[$check] ) ) {
$decision &= wfRssFilter($item[$check], $rssFilter) & wfRssFilterout($item[$check], $rssFilterout);
if( !$decision ) continue 2;
$item[$check] = wfRssHighlight( $item[$check], $rssHighlight );
}
}
$rssTemp = "";
foreach(explode("|", $template) as $bit) {
$bits = explode("=", $bit);
if( count($bits) == 2 ) {
$left = trim($bits[0]);
if( isset( $item[$left] ) ) {
$right = $item[$left];
}
$rssTemp .= implode( " = ", array($left, $right) );
} else {
$rssTemp .= $bit;
}
$rssTemp .= "|";
}
$rssTemp .= "}}";
$output .= $parser->recursiveTagParse( $rssTemp, $frame );
$headcnt++;
}
}
# Build items
if ( !$short && $description ) { # full item list
$output .= '<dl>';
foreach ( $rss->items as $item ) {
$d_text = true;
$d_title = true;
$href = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['link'] ) ) );
$title = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['title'] ) ) );
if ( $date ) {
$pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) );
$pubdate = date( $date, strtotime( $pubdate ) );
}
$d_title = wfRssFilter( $title, $rssFilter );
$d_title = wfRssFilterout( $title, $rssFilterout );
$title = wfRssHighlight( $title, $rssHighlight );
# Build description text if desired
if ( $item['description'] ) {
$text = trim( iconv( $charset, $wgOutputEncoding, $item['description'] ) );
# Avoid <pre> tags
$text = str_replace( "\r", ' ', $text );
$text = str_replace( "\n", ' ', $text );
$text = str_replace( "\t", ' ', $text );
$text = str_replace( '<br>', '', $text );
$d_text = wfRssFilter( $text, $rssFilter );
$d_text = wfRssFilterout( $text, $rssFilterout );
$text = wfRssHighlight( $text, $rssHighlight );
$display = $d_text || $d_title;
} else {
$text = '';
$display = $d_title;
}
if ( $display ) {
$output.= "<dt><a href='$href'><b>$title</b></a></dt>";
if ( $date ) {
$output .= " ($pubdate)";
}
if ( $text ) {
$output .= "<dd>$text <b>[<a href='$href'>?</a>]</b></dd>";
}
}
# Cut off output when maxheads is reached:
if ( ++$headcnt == $maxheads ) {
break;
}
}
$output .= '</dl>';
} else { # short item list
## HACKY HACKY HACKY
$output .= '<ul>';
$displayed = array();
foreach ( $rss->items as $item ) {
$href = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['link'] ) ) );
$title = htmlspecialchars( trim( iconv( $charset, $wgOutputEncoding, $item['title'] ) ) );
$d_title = wfRssFilter( $title, $rssFilter ) && wfRssFilterout( $title, $rssFilterout );
$title = wfRssHighlight( $title, $rssHighlight );
if ( $date ) {
$pubdate = isset( $item['pubdate'] ) ? trim( iconv( $charset, $wgOutputEncoding, $item['pubdate'] ) ) : '';
if ( $pubdate == '' ) {
$pubdate = trim( iconv( $charset, $wgOutputEncoding, $item['dc']['date'] ) );
}
$pubdate = date( $date, strtotime( $pubdate ) );
}
if ( $d_title && !in_array( $title, $displayed ) ) {
// Add date to ouput if specified
$output .= '<li><a href="' . $href . '" title="' . $title . '">' . $title . '</a>';
if( $date ) {
$output .= " ($pubdate)";
}
$output .= '</li>';
$displayed[] = $title;
# Cut off output when maxheads is reached:
if ( ++$headcnt == $maxheads ) {
break;
}
}
}
$output.= '</ul>';
}
return $output;
}
@ -343,4 +269,4 @@ function wfRssHighlight( $text, $rssHighlight ) {
}
return $text;
}
}

View file

@ -30,6 +30,14 @@ class RSSCache {
);
}
}
// check if it is writable.
if ( !is_writable( $this->BASE_CACHE ) ) {
wfDebugLog(
'RSS',
"Cache dir '" . $this->BASE_CACHE . "' is not writable."
);
}
}
/**

61
RSSData.php Normal file
View file

@ -0,0 +1,61 @@
<?php
class RSSData {
public $etag;
public $last_modified;
public $ERROR;
public $xml;
public $items;
function __construct( $resp ) {
$this->xml = new DOMDocument;
$this->xml->loadXML($resp->getContent());
$h = $resp->getResponseHeader('ETag');
$this->etag = $h;
$h = $resp->getResponseHeader('Last-Modified');
$this->last_modified = $h;
$xpath = new DOMXPath($this->xml);
$items = $xpath->evaluate("/rss/channel/item");
foreach($items as $item) {
$bit = array();
foreach($item->childNodes as $n) {
$name = $this->rssTokenToName($n->nodeName);
if($name != null)
$bit[$name] = $n->nodeValue;
}
var_dump(implode(":: ", array_keys($bit)));
$this->items[] = $bit;
}
}
function rssTokenToName($n) {
switch($n) {
case "dc:date":
return "date";
# parse "2010-10-18T18:07:00Z"
case "pubDate":
return "date";
# parse RFC date
case "dc:creator":
return "author";
case "title":
return "title";
case "content:encoded":
return "encodedContent";
case "slash:comments":
case "slash:department":
case "slash:section":
case "slash:hit_parade":
case "feedburner:origLink":
case "wfw:commentRss":
case "comments":
case "category":
return null;
default:
return $n;
}
}
}

View file

@ -10,26 +10,26 @@
* Globals - redefine these in your script to change the
* behaviour of fetch_rss() currently, most options effect the cache
*
* $wgMagpieRSSCache - Should Magpie cache parsed RSS objects?
* $wgRSSCache - Should we cache parsed RSS objects?
*
* $wgMagpieRSSCacheDirectory - Where should Magpie cache parsed RSS objects?
* $wgRSSCacheDirectory - Where should we cache parsed RSS objects?
* This should be a location that the webserver can write to. If this
* directory does not already exist, Magpie will try to be smart and create it.
* directory does not already exist, We will try to be smart and create it.
* This will often fail for permissions reasons.
*
* $wgMagpieRSSCacheAge - How long to store cached RSS objects (in seconds)?.
* $wgRSSCacheAge - How long to store cached RSS objects (in seconds)?.
*
* $wgMagpieRSSCacheFreshOnly - If remote fetch fails, throw an error
* $wgRSSCacheFreshOnly - If remote fetch fails, throw an error
* instead of returning stale object?
*/
$MAGPIE_ERROR = '';
$RSS_FETCH_ERROR = '';
/**
* Return RSS object for the given URL, maintaining caching.
*
* NOTES ON CACHING:
* If caching is on ($wgMagpieRSSCache) fetch_rss will first check the cache.
* If caching is on ($wgRSSCache) fetch_rss will first check the cache.
*
* NOTES ON RETRIEVING REMOTE FILES:
* If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
@ -37,29 +37,32 @@ $MAGPIE_ERROR = '';
*
* NOTES ON FAILED REQUESTS:
* If there is an HTTP error while fetching an RSS object, the cached version
* will be returned, if it exists (and if $wgMagpieRSSCacheFreshOnly is off)
* will be returned, if it exists (and if $wgRSSCacheFreshOnly is off)
*
* @param $url String: URL of RSS file
* @return parsed RSS object (see RSSParse)
*/
function fetch_rss( $url ) {
global $wgMagpieRSSCache, $wgMagpieRSSCacheAge, $wgMagpieRSSCacheFreshOnly;
global $wgMagpieRSSCacheDirectory, $wgMagpieRSSFetchTimeout;
global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding;
global $wgMagpieRSSDetectEncoding, $wgMagpieRSSUseGzip;
global $wgRSSCache, $wgRSSCacheAge, $wgRSSCacheFreshOnly;
global $wgRSSCacheDirectory, $wgRSSFetchTimeout;
global $wgRSSOutputEncoding, $wgRSSInputEncoding;
global $wgRSSDetectEncoding, $wgRSSUseGzip;
$wgMagpieRSSCache = true;
$wgMagpieRSSCacheAge = 60 * 60; // one hour
$wgMagpieRSSCacheFreshOnly = false;
$wgMagpieRSSCacheDirectory = '/extensions/RSS/cache';
$wgMagpieRSSOutputEncoding = 'ISO-8859-1';
$wgMagpieRSSInputEncoding = null;
$wgMagpieRSSDetectEncoding = true;
$nameValue = array('wgRSSCache' => true,
'wgRSSCacheAge' => 60 * 60, // one hour
'wgRSSCacheFreshOnly' => false,
'wgRSSCacheDirectory' => '/extensions/RSS/cache',
'wgRSSOutputEncoding' => 'ISO-8859-1',
'wgRSSInputEncoding' => null,
'wgRSSDetectEncoding' => true,
'wgRSSFetchTimeout' => 5, // 5 second timeout
'wgRSSUseGzip' => true);
$wgMagpieRSSFetchTimeout = 5; // 5 second timeout
// use gzip encoding to fetch RSS files if supported?
$wgMagpieRSSUseGzip = true;
foreach($nameValue as $n => $v) {
if( !isset( $GLOBALS[$n] ) ) {
$GLOBALS[$n] = $v;
}
}
if ( !isset( $url ) ) {
wfDebugLog( 'RSS', 'fetch_rss (RSSFetch.php) called without a URL!' );
@ -67,10 +70,11 @@ function fetch_rss( $url ) {
}
// if cache is disabled
if ( !$wgMagpieRSSCache ) {
if ( !$wgRSSCache ) {
// fetch file, and parse it
$resp = _fetch_remote_file( $url );
if ( $resp->status >= 200 && $resp->status < 300 ) {
$errs = $resp->getErrorsArray();
if ( count( $errs ) == 0 ) {
return _response_to_rss( $resp );
} else {
wfDebugLog( 'RSS', "Failed to fetch $url and cache is off" );
@ -82,12 +86,12 @@ function fetch_rss( $url ) {
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
$cache = new RSSCache( $wgMagpieRSSCacheDirectory, $wgMagpieRSSCacheAge );
$cache = new RSSCache( $wgRSSCacheDirectory, $wgRSSCacheAge );
if ( $cache->ERROR ) {
wfDebugLog(
'RSS',
'MagpieRSS: cache error on RSSFetch.php! Error msg: ' .
'cache error on RSSFetch.php! Error msg: ' .
$cache->ERROR
);
}
@ -99,7 +103,7 @@ function fetch_rss( $url ) {
// store parsed XML by desired output encoding
// as character munging happens at parse time
$cache_key = $url . $wgMagpieRSSOutputEncoding;
$cache_key = $url . $wgRSSOutputEncoding;
if ( !$cache->ERROR ) {
// return cache HIT, MISS, or STALE
@ -112,7 +116,7 @@ function fetch_rss( $url ) {
if ( isset( $rss ) && $rss ) {
// should be cache age
$rss->from_cache = 1;
wfDebugLog( 'RSS', 'MagpieRSS: Cache HIT' );
wfDebugLog( 'RSS', 'Cache HIT' );
return $rss;
}
}
@ -128,16 +132,17 @@ function fetch_rss( $url ) {
}
}
var_dump($request_headers);
$resp = _fetch_remote_file( $url, $request_headers );
if ( isset( $resp ) && $resp ) {
if ( $resp->status == '304' ) {
if ( $resp->getStatus() === 304 ) {
// we have the most current copy
wfDebugLog( 'RSS', "Got 304 for $url" );
// reset cache on 304 (at minutillo insistent prodding)
$cache->set( $cache_key, $rss );
return $rss;
} elseif ( $resp->status >= 200 && $resp->status < 300 ) {
} elseif ( $resp->getStatus() >= 200 && $resp->getStatus() < 300 ) {
$rss = _response_to_rss( $resp );
if ( $rss ) {
wfDebugLog( 'RSS', 'Fetch successful' );
@ -147,12 +152,10 @@ function fetch_rss( $url ) {
}
} else {
$errormsg = "Failed to fetch $url ";
if ( $resp->status == '-100' ) {
global $wgMagpieRSSFetchTimeout;
$errormsg .= '(Request timed out after ' . $wgMagpieRSSFetchTimeout . ' seconds)';
if ( $resp->getStatus() === -100 ) {
global $wgRSSFetchTimeout;
$errormsg .= '(Request timed out after ' . $wgRSSFetchTimeout . ' seconds)';
} elseif ( $resp->error ) {
// compensate for Snoopy's annoying habbit to tacking
// on '\n'
$http_error = substr( $resp->error, 0, -2 );
$errormsg .= "(HTTP Error: $http_error)";
} else {
@ -172,38 +175,47 @@ function fetch_rss( $url ) {
}
// else we totally failed
$MAGPIE_ERROR = $errormsg;
$RSS_FETCH_ERROR = $errormsg;
wfDebugLog(
'MagpieRSS (RSSFetch): we totally failed :-( Error message:' .
'RSSFetch: we totally failed :-( Error message:' .
$errormsg
);
return false;
} // end if ( !$wgMagpieRSSCache ) {
} // end if ( !$wgRSSCache ) {
} // end fetch_rss()
/**
* Retrieve an arbitrary remote file.
* @param $url String: URL of the remote file
* @param $headers Array: headers to send along with the request
* @return an HTTP response object (see Snoopy.class.php)
* @return an HTTP response object
*/
function _fetch_remote_file( $url, $headers = '' ) {
global $wgMagpieRSSFetchTimeout, $wgMagpieRSSUseGzip;
// Snoopy is an HTTP client in PHP
if ( !class_exists( 'Snoopy', false ) ) {
require_once( dirname( __FILE__ ) . '/Snoopy.class.php' );
}
$client = new Snoopy();
$client->agent = 'MagpieRSS/0.72 (+http://magpierss.sourceforge.net) / MediaWiki RSS extension';
$client->read_timeout = $wgMagpieRSSFetchTimeout;
$client->use_gzip = $wgMagpieRSSUseGzip;
if ( is_array( $headers ) ) {
$client->rawheaders = $headers;
global $wgRSSFetchTimeout, $wgRSSUseGzip;
$client =
HttpRequest::factory($url, array('timeout' => $wgRSSFetchTimeout));
$client->setUserAgent('MediawikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension');
/* $client->use_gzip = $wgRSSUseGzip; */
if ( is_array( $headers ) && count( $headers ) > 0 ) {
foreach($headers as $h) {
if( count( $h ) > 1 ) {
$client->setHeader($h[0], $h[1]);
} else {
var_dump($h);
}
}
}
@$client->fetch( $url );
return $client;
$fetch = $client->execute();
/* @$client->fetch( $url ); */
if( $fetch->isGood() ) {
return $client;
} else {
wfDebugLog( 'RSS', 'error fetching $url: ' . $fetch->getMessage() );
}
}
/**
@ -212,45 +224,24 @@ function _fetch_remote_file( $url, $headers = '' ) {
* @return parsed RSS object (see RSSParse) or false
*/
function _response_to_rss( $resp ) {
global $wgMagpieRSSOutputEncoding, $wgMagpieRSSInputEncoding, $wgMagpieRSSDetectEncoding;
$rss = new MagpieRSS(
$resp->results,
$wgMagpieRSSOutputEncoding,
$wgMagpieRSSInputEncoding,
$wgMagpieRSSDetectEncoding
);
global $wgRSSOutputEncoding, $wgRSSInputEncoding, $wgRSSDetectEncoding;
$rss = new RSSData($resp);
// if RSS parsed successfully
if ( $rss && !$rss->ERROR ) {
// find Etag and Last-Modified
foreach( $resp->headers as $h ) {
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
if ( strpos( $h, ': ' ) ) {
list( $field, $val ) = explode( ': ', $h, 2 );
} else {
$field = $h;
$val = '';
}
if ( $field == 'ETag' ) {
$rss->etag = $val;
}
if ( $field == 'Last-Modified' ) {
$rss->last_modified = $val;
}
}
return $rss;
} else { // else construct error message
$errormsg = 'Failed to parse RSS file.';
$errormsg = 'Failed to parsex RSS file.';
if ( $rss ) {
$errormsg .= ' (' . $rss->ERROR . ')';
}
$MAGPIE_ERROR = $errormsg;
$RSS_FETCH_ERROR = $errormsg;
wfDebugLog( 'RSS', 'error!' . $errormsg );
return false;
} // end if ( $rss && !$rss->ERROR )
}
}

View file

@ -1,494 +0,0 @@
<?php
/**
* Hybrid parser, and object, takes RSS or Atom feed as a string and returns a
* simple object.
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3
*
* @file
* @see RSSFetch.php for a simpler interface with integrated caching support
*/
class MagpieRSS {
public $parser;
public $current_item = array(); // item currently being parsed
public $items = array(); // collection of parsed items
public $channel = array(); // hash of channel fields
public $textinput = array();
public $image = array();
public $feed_type;
public $feed_version;
public $encoding = ''; // output encoding of parsed rss
public $_source_encoding = ''; // only set if we have to parse xml prolog
public $ERROR = '';
public $WARNING = '';
// define some constants
public $_CONTENT_CONSTRUCTS = array( 'content', 'summary', 'info', 'title', 'tagline', 'copyright' );
public $_KNOWN_ENCODINGS = array( 'UTF-8', 'US-ASCII', 'ISO-8859-1' );
// parser variables, useless if you're not a parser, treat as private
public $stack = array(); // parser stack
public $inchannel = false;
public $initem = false;
public $incontent = false; // if in Atom <content mode="xml"> field
public $intextinput = false;
public $inimage = false;
public $current_namespace = false;
/**
* Set up XML parser, parse source, and return populated RSS object..
*
* @param $source String: string containing the RSS to be parsed
*
* NOTE: Probably a good idea to leave the encoding options alone unless
* you know what you're doing as PHP's character set support is
* a little weird.
*
* NOTE: A lot of this is unnecessary but harmless with PHP5
*
*
* @param $output_encoding String: output the parsed RSS in this character
* set defaults to ISO-8859-1 as this is PHP's
* default.
*
* NOTE: might be changed to UTF-8 in future
* versions.
*
* @param $input_encoding String: the character set of the incoming RSS source.
* Leave blank and Magpie will try to figure it
* out.
*
* @param $detect_encoding Boolean: if false, Magpie won't attempt to
* detect source encoding. (caveat emptor)
*/
function __construct( $source, $output_encoding = 'ISO-8859-1',
$input_encoding = null, $detect_encoding = true )
{
# if PHP xml isn't compiled in, die
if ( !function_exists( 'xml_parser_create' ) ) {
$this->error(
"Failed to load PHP's XML Extension. " .
'http://www.php.net/manual/en/ref.xml.php',
E_USER_ERROR
);
}
list( $parser, $source ) = $this->create_parser(
$source,
$output_encoding,
$input_encoding,
$detect_encoding
);
if ( !is_resource( $parser ) ) {
$this->error(
"Failed to create an instance of PHP's XML parser. " .
'http://www.php.net/manual/en/ref.xml.php',
E_USER_ERROR
);
}
$this->parser = $parser;
# pass in parser, and a reference to this object
# setup handlers
xml_set_object( $this->parser, $this );
xml_set_element_handler(
$this->parser,
'feed_start_element',
'feed_end_element'
);
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$status = xml_parse( $this->parser, $source );
if ( !$status ) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
$xml_error = xml_error_string( $errorcode );
$error_line = xml_get_current_line_number( $this->parser );
$error_col = xml_get_current_column_number( $this->parser );
$errormsg = "$xml_error at line $error_line, column $error_col";
$this->error( $errormsg );
}
}
xml_parser_free( $this->parser );
$this->normalize();
}
function feed_start_element( $p, $element, &$attrs ) {
$el = $element = strtolower( $element );
$attrs = array_change_key_case( $attrs, CASE_LOWER );
// check for a namespace, and split if found
$ns = false;
if ( strpos( $element, ':' ) ) {
list( $ns, $el ) = explode( ':', $element, 2 );
}
if ( $ns && $ns != 'rdf' ) {
$this->current_namespace = $ns;
}
// if feed type isn't set, then this is first element of feed
// identify feed from root element
if ( !isset( $this->feed_type ) ) {
if ( $el == 'rdf' ) {
$this->feed_type = 'RSS';
$this->feed_version = '1.0';
} elseif ( $el == 'rss' ) {
$this->feed_type = 'RSS';
$this->feed_version = $attrs['version'];
} elseif ( $el == 'feed' ) {
$this->feed_type = 'Atom';
$this->feed_version = $attrs['version'];
$this->inchannel = true;
}
return;
}
if ( $el == 'channel' ) {
$this->inchannel = true;
} elseif ( $el == 'item' || $el == 'entry' ) {
$this->initem = true;
if ( isset( $attrs['rdf:about'] ) ) {
$this->current_item['about'] = $attrs['rdf:about'];
}
}
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
elseif (
$this->feed_type == 'RSS' &&
$this->current_namespace == '' &&
$el == 'textinput' )
{
$this->intextinput = true;
} elseif (
$this->feed_type == 'RSS' &&
$this->current_namespace == '' &&
$el == 'image' )
{
$this->inimage = true;
}
// handle Atom content constructs
elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) {
// avoid clashing w/ RSS mod_content
if ( $el == 'content' ) {
$el = 'atom_content';
}
$this->incontent = $el;
}
// if inside an Atom content construct (e.g. content or summary) field treat tags as text
elseif ( $this->feed_type == 'Atom' && $this->incontent ) {
// if tags are inlined, then flatten
$attrs_str = join(
' ',
array_map(
array( 'MagpieRSS', 'mapAttributes' ),
array_keys( $attrs ),
array_values( $attrs )
)
);
$this->append_content( "<$element $attrs_str>" );
array_unshift( $this->stack, $el );
}
// Atom support many links per containging element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
elseif ( $this->feed_type == 'Atom' && $el == 'link' ) {
if ( isset( $attrs['rel'] ) && $attrs['rel'] == 'alternate' ) {
$link_el = 'link';
} else {
$link_el = 'link_' . $attrs['rel'];
}
$this->append( $link_el, $attrs['href'] );
} else { // set stack[0] to current element
array_unshift( $this->stack, $el );
}
}
function feed_cdata( $p, $text ) {
if ( $this->feed_type == 'Atom' && $this->incontent ) {
$this->append_content( $text );
} else {
$current_el = join( '_', array_reverse( $this->stack ) );
$this->append( $current_el, $text );
}
}
function feed_end_element( $p, $el ) {
$el = strtolower( $el );
if ( $el == 'item' || $el == 'entry' ) {
$this->items[] = $this->current_item;
$this->current_item = array();
$this->initem = false;
} elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'textinput' ) {
$this->intextinput = false;
} elseif ( $this->feed_type == 'RSS' && $this->current_namespace == '' && $el == 'image' ) {
$this->inimage = false;
} elseif ( $this->feed_type == 'Atom' && in_array( $el, $this->_CONTENT_CONSTRUCTS ) ) {
$this->incontent = false;
} elseif ( $el == 'channel' || $el == 'feed' ) {
$this->inchannel = false;
} elseif ( $this->feed_type == 'Atom' && $this->incontent ) {
// balance tags properly
// note: I don't think this is actually neccessary
if ( $this->stack[0] == $el ) {
$this->append_content( "</$el>" );
} else {
$this->append_content( "<$el />" );
}
array_shift( $this->stack );
} else {
array_shift( $this->stack );
}
$this->current_namespace = false;
}
function concat( &$str1, $str2 = '' ) {
if ( !isset( $str1 ) ) {
$str1 = '';
}
$str1 .= $str2;
}
function append_content( $text ) {
if ( $this->initem ) {
$this->concat( $this->current_item[$this->incontent], $text );
} elseif ( $this->inchannel ) {
$this->concat( $this->channel[$this->incontent], $text );
}
}
// smart append - field and namespace aware
function append( $el, $text ) {
if ( !$el ) {
return;
}
if ( $this->current_namespace ) {
if ( $this->initem ) {
$this->concat(
$this->current_item[$this->current_namespace][$el], $text
);
} elseif ( $this->inchannel ) {
$this->concat(
$this->channel[$this->current_namespace][$el], $text
);
} elseif ( $this->intextinput ) {
$this->concat(
$this->textinput[$this->current_namespace][$el], $text
);
} elseif ( $this->inimage ) {
$this->concat(
$this->image[$this->current_namespace][$el], $text
);
}
} else {
if ( $this->initem ) {
$this->concat(
$this->current_item[$el], $text
);
} elseif ( $this->intextinput ) {
$this->concat(
$this->textinput[$el], $text
);
} elseif ( $this->inimage ) {
$this->concat(
$this->image[$el], $text
);
} elseif ( $this->inchannel ) {
$this->concat(
$this->channel[$el], $text
);
}
}
}
function normalize() {
// if atom populate rss fields
if ( $this->is_atom() ) {
$this->channel['description'] = $this->channel['tagline'];
for ( $i = 0; $i < count( $this->items ); $i++ ) {
$item = $this->items[$i];
if ( isset( $item['summary'] ) ) {
$item['description'] = $item['summary'];
}
if ( isset( $item['atom_content'] ) ) {
$item['content']['encoded'] = $item['atom_content'];
}
$atom_date = ( isset( $item['issued'] ) ) ? $item['issued'] : $item['modified'];
if ( $atom_date ) {
$epoch = @$this->parse_w3cdtf( $atom_date );
if ( $epoch && $epoch > 0 ) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
} elseif ( $this->is_rss() ) {
$this->channel['tagline'] = $this->channel['description'];
for ( $i = 0; $i < count( $this->items ); $i++ ) {
$item = $this->items[$i];
if ( isset( $item['description'] ) ) {
$item['summary'] = $item['description'];
}
if ( isset( $item['content']['encoded'] ) ) {
$item['atom_content'] = $item['content']['encoded'];
}
if ( $this->is_rss() == '1.0' && isset( $item['dc']['date'] ) ) {
$epoch = @$this->parse_w3cdtf( $item['dc']['date'] );
if ( $epoch && $epoch > 0 ) {
$item['date_timestamp'] = $epoch;
}
} elseif ( isset( $item['pubdate'] ) ) {
$epoch = @strtotime( $item['pubdate'] );
if ( $epoch > 0 ) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
}
function is_rss() {
if ( $this->feed_type == 'RSS' ) {
return $this->feed_version;
} else {
return false;
}
}
function is_atom() {
if ( $this->feed_type == 'Atom' ) {
return $this->feed_version;
} else {
return false;
}
}
/**
* Instantiate an XML parser.
* @return XML parser, and possibly re-encoded source
*/
function create_parser( $source, $out_enc, $in_enc, $detect ) {
// by default PHP5 does a fine job of detecting input encodings
if( !$detect && $in_enc ) {
$parser = xml_parser_create( $in_enc );
} else {
$parser = xml_parser_create( '' );
}
if ( $out_enc ) {
$this->encoding = $out_enc;
xml_parser_set_option(
$parser,
XML_OPTION_TARGET_ENCODING,
$out_enc
);
}
return array( $parser, $source );
}
/**
* Checks if $enc is an encoding type supported by MagpieRSS.
* @param $enc String: encoding name
* @return String or false
*/
function known_encoding( $enc ) {
$enc = strtoupper( $enc );
if ( in_array( $enc, $this->_KNOWN_ENCODINGS ) ) {
return $enc;
} else {
return false;
}
}
function error( $errormsg, $lvl = E_USER_WARNING ) {
// append PHP's error message if track_errors is enabled
if ( isset( $php_errormsg ) ) {
$errormsg .= " ($php_errormsg)";
}
$notices = E_USER_NOTICE|E_NOTICE;
if ( $lvl&$notices ) {
$this->WARNING = $errormsg;
} else {
$this->ERROR = $errormsg;
}
}
/**
* Parse a W3CDTF date into unix epoch.
* This used to be in its own file.
* @note http://www.w3.org/TR/NOTE-datetime
* @param $date_str String: date string to parse
* @return Integer
*/
public static function parse_w3cdtf( $date_str ) {
// regex to match wc3dtf
$pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
if ( preg_match( $pat, $date_str, $match ) ) {
list( $year, $month, $day, $hours, $minutes, $seconds ) =
array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6] );
// calculate epoch for current date assuming GMT
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year );
$offset = 0;
if ( $match[10] == 'Z' ) {
// zulu time, aka GMT
} else {
list( $tz_mod, $tz_hour, $tz_min ) =
array( $match[8], $match[9], $match[10] );
// zero out the variables
if ( !$tz_hour ) {
$tz_hour = 0;
}
if ( !$tz_min ) {
$tz_min = 0;
}
$offset_secs = ( ( $tz_hour * 60 ) + $tz_min ) * 60;
// is timezone ahead of GMT? then subtract offset
if ( $tz_mod == '+' ) {
$offset_secs = $offset_secs * -1;
}
$offset = $offset_secs;
}
$epoch = $epoch + $offset;
return $epoch;
} else {
return -1;
}
}
public static function mapAttributes( $k, $v ) {
return "$k=\"$v\"";
}
} // end class MagpieRSS

File diff suppressed because it is too large Load diff