mediawiki-extensions-RSS/RSS.php

411 lines
10 KiB
PHP
Raw Normal View History

<?php
/**
* RSS-Feed MediaWiki extension
2010-07-23 14:14:05 +00:00
*
* @file
* @ingroup Extensions
* @version 1.7
* @author mutante, Daniel Kinzler, Rdb, Mafs, Alxndr, Chris Reigrut, K001
* @author Kellan Elliott-McCrea <kellan@protest.net> -- author of MagpieRSS
* @author Jeroen De Dauw
* @author Jack Phoenix <jack@countervandalism.net>
* @copyright © Kellan Elliott-McCrea <kellan@protest.net>
* @copyright © mutante, Daniel Kinzler, Rdb, Mafs, Alxndr, Chris Reigrut, K001
* @link http://www.mediawiki.org/wiki/Extension:RSS Documentation
*/
2010-07-23 14:14:05 +00:00
2010-10-20 18:28:47 +00:00
if ( !defined( 'MEDIAWIKI' ) ) {
die( "This is not a valid entry point.\n" );
}
2010-07-23 14:14:05 +00:00
// Extension credits that will show up on Special:Version
$wgExtensionCredits['parserhook'][] = array(
'name' => 'RSS feed',
2010-07-23 14:14:05 +00:00
'author' => array(
'Kellan Elliott-McCrea',
'mutante',
'Daniel Kinzler',
'Rdb',
'Mafs',
'Alxndr',
'Wikinaut',
'Chris Reigrut',
'K001',
'Jack Phoenix',
2010-10-19 22:28:12 +00:00
'Jeroen De Dauw',
'Mark A. Hershberger'
),
2010-10-19 22:28:12 +00:00
'version' => '1.8',
'url' => 'http://www.mediawiki.org/wiki/Extension:RSS',
'descriptionmsg' => 'rss-desc',
);
2010-07-23 14:14:05 +00:00
// Internationalization file and autoloadable classes
$dir = dirname( __FILE__ ) . '/';
$wgExtensionMessagesFiles['RSS'] = $dir . 'RSS.i18n.php';
$wgAutoloadClasses['RSSData'] = $dir . 'RSSData.php';
$wgHooks['ParserFirstCallInit'][] = 'RSS::parserInit';
$wgRSSCacheAge = 3600; // one hour
$wgRSSCacheFreshOnly = false;
$wgRSSOutputEncoding = 'ISO-8859-1';
$wgRSSInputEncoding = null;
$wgRSSDetectEncoding = true;
$wgRSSFetchTimeout = 5; // 5 second timeout
$wgRSSUseGzip = true;
class RSS {
protected $charset;
protected $maxheads = 32;
protected $reversed = false;
protected $highlight = array();
protected $filter = array();
protected $filterOut = array();
protected $itemTemplate;
protected $url;
protected $etag;
protected $last_modified;
protected $xml;
protected $ERROR;
public $client;
static function parserInit( $parser ) {
# Install parser hook for <rss> tags
$parser->setHook( 'rss', array( __CLASS__, 'renderRss' ) );
return true;
}
2010-07-23 14:14:05 +00:00
# Parser hook callback function
static function renderRss( $input, $args, $parser, $frame ) {
if ( !$input ) {
return ''; # if <rss>-section is empty, return nothing
}
$parser->disableCache();
2010-07-23 14:14:05 +00:00
$rss = new RSS($input, $args);
2010-07-23 14:14:05 +00:00
$status = $rss->fetch();
# Check for errors.
if ( $status === false || !is_array( $rss->rss->items ) )
return wfMsg( 'rss-empty', $input );
2010-07-23 14:14:05 +00:00
if ( isset( $rss->ERROR ) )
return wfMsg( 'rss-error', $rss->ERROR );
2010-07-23 14:14:05 +00:00
return $rss->renderFeed($parser, $frame);
}
static function explodeOnSpaces( $str ) {
$found = preg_split( '# +#', $str );
return is_array( $found ) ? $found : array();
}
2010-07-23 14:14:05 +00:00
function __construct($url, $args) {
if( isset($url) ) {
$this->url = $url;
}
# Get charset from argument array
if ( isset( $args['charset'] ) ) {
$this->charset = $args['charset'];
} else {
global $wgOutputEncoding;
$args['charset'] = $wgOutputEncoding;
}
2010-07-23 14:14:05 +00:00
# Get max number of headlines from argument-array
if ( isset( $args['max'] ) ) {
$this->maxheads = $args['max'];
}
2010-07-23 14:14:05 +00:00
# Get reverse flag from argument array
if ( isset( $args['reverse'] ) ) {
$this->reversed = true;
}
2010-07-23 14:14:05 +00:00
# Get date format from argument array
# FIXME: not used yet
if ( isset( $args['date'] ) ) {
$this->date = $args['date'];
}
# Get highlight terms from argument array
if ( isset( $args['highlight'] ) ) {
$this->highlight = self::explodeOnSpaces( $args['highlight'] );
}
# Get filter terms from argument array
if ( isset( $args['filter'] ) ) {
$this->filter = self::explodeOnSpaces( $args['filter'] );
}
2010-07-23 14:14:05 +00:00
if ( isset( $args['filterout'] ) ) {
$this->filterOut = self::explodeOnSpaces( $args['filterout'] );
2010-07-23 14:14:05 +00:00
}
if ( isset( $args['template'] ) ) {
$titleObject = Title::newFromText($args['template'], NS_TEMPLATE);
$article = new Article($titleObject, 0);
$this->itemTemplate = $article->fetchContent(0);
} else {
$this->itemTemplate = wfMsgNoTrans( 'rss-item' );
}
}
/**
* Return RSS object for the given URL, maintaining caching.
*
* NOTES ON RETRIEVING REMOTE FILES:
* If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
* return a cached object, and touch the cache object upon recieving a 304.
*
* NOTES ON FAILED REQUESTS:
* If there is an HTTP error while fetching an RSS object, the cached version
* will be returned, if it exists (and if $wgRSSCacheFreshOnly is off
*
* @param $url String: URL of RSS file
* @return boolean true if the fetch worked.
*/
function fetch( ) {
global $wgRSSCacheAge, $wgRSSCacheFreshOnly;
global $wgRSSCacheDirectory, $wgRSSFetchTimeout;
global $wgRSSOutputEncoding, $wgRSSInputEncoding;
global $wgRSSDetectEncoding, $wgRSSUseGzip;
if ( !isset( $this->url ) ) {
wfDebugLog( 'RSS: fetch called without a URL!' );
return false;
}
// Flow
// 1. check cache
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
$key = wfMemcKey( $this->url );
$cachedFeed = $this->loadFromCache($key);
if( $cachedFeed !== false ) {
wfDebugLog( 'RSS', 'Outputting cached feed for '.$this->url );
return true;
}
wfDebugLog( 'RSS', 'Cache Failed '.$this->url );
$status = $this->fetchRemote($key);
return $status;
}
2010-07-23 14:14:05 +00:00
function loadFromCache( $key ) {
global $parserMemc;
$data = $parserMemc->get($key);
if ($data === false) {
return false;
}
list($etag, $last_modified, $rss) =
unserialize($data);
if( !isset( $rss->items ) ) {
return false;
}
# Now that we've verified that we got useful data, keep it around.
$this->rss = $rss;
$this->etag = $etag;
$this->last_modified = $last_modified;
return true;
}
2010-07-23 14:14:05 +00:00
function storeInCache( $key ) {
global $parserMemc, $wgRSSCacheAge;
if( isset( $this->rss ) ) {
return $parserMemc->set($key,
serialize( array($this->etag, $this->last_modified,
$this->rss) ), $wgRSSCacheAge);
}
}
2010-07-23 14:14:05 +00:00
/**
* Retrieve a feed.
* @param $url String: URL of the feed.
* @param $headers Array: headers to send along with the request
* @return Status object
*/
protected function fetchRemote( $key, $headers = '' ) {
global $wgRSSFetchTimeout, $wgRSSUseGzip;
if ( $this->etag ) {
wfDebugLog( 'RSS', 'Used etag: '.$this->etag );
$headers['If-None-Match'] = $this->etag;
}
if ( $this->last_modified ) {
wfDebugLog( 'RSS', 'Used last modified: '.$this->last_modified );
$headers['If-Last-Modified'] = $this->last_modified;
}
$client =
HttpRequest::factory( $this->url, array( 'timeout' => $wgRSSFetchTimeout ) );
$client->setUserAgent( 'MediaWikiRSS/0.01 (+http://www.mediawiki.org/wiki/Extension:RSS) / MediaWiki RSS extension' );
/* $client->use_gzip = $wgRSSUseGzip; */
if ( is_array( $headers ) && count( $headers ) > 0 ) {
foreach ( $headers as $h ) {
if ( count( $h ) > 1 ) {
$client->setHeader( $h[0], $h[1] );
}
}
}
$fetch = $client->execute();
$this->client = $client;
if ( !$fetch->isGood() ) {
wfDebug( 'RSS', 'Request Failed: '.$fetch->getWikiText() );
return $fetch;
}
$ret = $this->responseToXML($key);
return $ret;
}
function renderFeed( $parser, $frame ) {
$output = "";
if ( $this->itemTemplate ) {
$headcnt = 0;
if ($this->reversed) {
$this->rss->items = array_reverse( $this->rss->items );
}
foreach ( $this->rss->items as $item ) {
if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) {
continue;
}
2010-07-23 14:14:05 +00:00
if ( $this->canDisplay( $item ) ) {
$output .= $this->renderItem( $item, $parser, $frame );
$headcnt++;
}
}
}
return $output;
}
2010-07-23 14:14:05 +00:00
function renderItem( $item, $parser, $frame ) {
$parts = explode( '|', $this->itemTemplate );
$output = "";
if( count( $parts ) > 1 && isset( $parser ) && isset( $frame ) ) {
$rendered = array();
foreach( $parts as $part ) {
$bits = explode( '=', $part );
$left = null;
2010-07-23 14:14:05 +00:00
2010-10-20 18:28:47 +00:00
if ( count( $bits ) == 2 ) {
$left = trim( $bits[0] );
}
if ( isset( $item[$left] ) ) {
$leftValue = preg_replace( '#{{{'.$left.'}}}#', $item[$left], $bits[1] );
$rendered[] = implode( '=', array( $left, $leftValue ) );
} else {
$rendered[] = $part;
}
}
$rssTemp = implode(" | ", $rendered);
$output .= $parser->recursiveTagParse( $rssTemp, $frame );
}
return $output;
}
2010-07-23 14:14:05 +00:00
/**
* Parse an HTTP response object into an RSS object.
* @param $resp Object: an HTTP response object (see Snoopy)
* @return parsed RSS object (see RSSParse) or false
*/
function responseToXML( $key ) {
$this->xml = new DOMDocument;
$this->xml->loadXML( $this->client->getContent() );
$this->rss = new RSSData( $this->xml );
// if RSS parsed successfully
if ( $this->rss && !$this->rss->ERROR ) {
$this->etag = $this->client->getResponseHeader('Etag');
$this->last_modified = $this->client->getResponseHeader('Last-Modified');
wfDebugLog( 'RSS', 'Stored etag ('.$this->etag.') and Last-Modified ('.$this->last_modified.') and items ('.count($this->rss->items).')!' );
$this->storeInCache( $key );
return Status::newGood();
} else {
return Status::newfatal( 'rss-parse-error', $this->rss->ERROR );
}
}
2010-07-23 14:14:05 +00:00
function canDisplay( $item ) {
if($this->filter($item['description'], 'filterOut')) {
error_log($item['description']);
return true;
}
return false;
}
function filter( $text, $filterType ) {
if($filterType === 'filterOut') {
$keep = false;
$filter = $this->filterOut;
} else {
$keep = true;
$filter = $this->filter;
}
if( count($filter) == 0 ) return !$keep;
foreach( $filter as $term ) {
if ( $term ) {
$match = preg_match( "|$term|i", $text );
if ( $match ) {
return $keep;
}
}
return !$keep;
}
}
2010-07-23 14:14:05 +00:00
function highlightTerms( $text ) {
$i = 0;
$starttag = 'v8x5u3t3u8h';
$endtag = 'q8n4f6n4n4x';
2010-07-23 14:14:05 +00:00
$color[] = 'coral';
$color[] = 'greenyellow';
$color[] = 'lightskyblue';
$color[] = 'gold';
$color[] = 'violet';
$count_color = count( $color );
foreach ( $this->highlight as $term ) {
if ( $term ) {
2010-07-23 14:14:05 +00:00
$text = preg_replace( "|\b(\w*?" . $term . "\w*?)\b|i", "$starttag" . "_" . $i . "\\1$endtag", $text );
$i++;
if ( $i == $count_color ) {
$i = 0;
}
}
}
2010-07-23 14:14:05 +00:00
# To avoid trouble should someone wants to highlight the terms "span", "style", …
for ( $i = 0; $i < 5; $i++ ) {
$text = preg_replace( "|$starttag" . "_" . $i . "|", "<span style=\"background-color:" . $color[$i] . "; font-weight: bold;\">", $text );
$text = preg_replace( "|$endtag|", '</span>', $text );
}
2010-07-23 14:14:05 +00:00
return $text;
}
}