2010-11-04 23:19:00 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class RSSParser {
|
|
|
|
protected $maxheads = 32;
|
2012-02-13 07:23:56 +00:00
|
|
|
protected $date = "Y-m-d H:i:s";
|
|
|
|
protected $ItemMaxLength = 200;
|
2010-11-04 23:19:00 +00:00
|
|
|
protected $reversed = false;
|
|
|
|
protected $highlight = array();
|
|
|
|
protected $filter = array();
|
|
|
|
protected $filterOut = array();
|
|
|
|
protected $itemTemplate;
|
|
|
|
protected $url;
|
|
|
|
protected $etag;
|
|
|
|
protected $lastModified;
|
|
|
|
protected $xml;
|
2010-11-13 19:11:28 +00:00
|
|
|
protected $error;
|
2010-11-04 23:19:00 +00:00
|
|
|
protected $displayFields = array( 'author', 'title', 'encodedContent', 'description' );
|
|
|
|
|
2012-01-28 17:05:20 +00:00
|
|
|
/**
|
|
|
|
* @var RSSData
|
|
|
|
*/
|
|
|
|
public $rss;
|
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
public $client;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Convenience function that takes a space-separated string and returns an array of words
|
2010-11-05 00:59:28 +00:00
|
|
|
* @param $str String: list of words
|
|
|
|
* @return Array words found
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
private static function explodeOnSpaces( $str ) {
|
|
|
|
$found = preg_split( '# +#', $str );
|
|
|
|
return is_array( $found ) ? $found : array();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Take a bit of WikiText that looks like
|
|
|
|
* <rss max=5>http://example.com/</rss>
|
|
|
|
* and return an object that can produce rendered output.
|
|
|
|
*/
|
|
|
|
function __construct( $url, $args ) {
|
2012-02-13 07:23:56 +00:00
|
|
|
global $wgRSSDateDefaultFormat,$wgRSSItemMaxLength;
|
2012-02-13 01:39:24 +00:00
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
$this->url = $url;
|
|
|
|
|
|
|
|
# Get max number of headlines from argument-array
|
|
|
|
if ( isset( $args['max'] ) ) {
|
|
|
|
$this->maxheads = $args['max'];
|
|
|
|
}
|
|
|
|
|
|
|
|
# Get reverse flag from argument array
|
|
|
|
if ( isset( $args['reverse'] ) ) {
|
|
|
|
$this->reversed = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
# Get date format from argument array
|
2012-02-13 01:39:24 +00:00
|
|
|
# or use a default value
|
2012-02-18 07:35:23 +00:00
|
|
|
if ( isset( $args['date'] ) ) {
|
2010-11-04 23:19:00 +00:00
|
|
|
$this->date = $args['date'];
|
2012-02-18 07:35:23 +00:00
|
|
|
} elseif ( isset( $wgRSSDateDefaultFormat ) ) {
|
2012-02-13 01:39:24 +00:00
|
|
|
$this->date = $wgRSSDateDefaultFormat;
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
2012-02-13 01:39:24 +00:00
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
# Get highlight terms from argument array
|
|
|
|
if ( isset( $args['highlight'] ) ) {
|
|
|
|
# mapping to lowercase here so the regex can be case insensitive below.
|
|
|
|
$this->highlight = self::explodeOnSpaces( $args['highlight'] );
|
|
|
|
}
|
|
|
|
|
|
|
|
# Get filter terms from argument array
|
|
|
|
if ( isset( $args['filter'] ) ) {
|
|
|
|
$this->filter = self::explodeOnSpaces( $args['filter'] );
|
|
|
|
}
|
|
|
|
|
2012-02-13 07:23:56 +00:00
|
|
|
# Get a maximal length for item texts
|
2012-02-18 07:35:23 +00:00
|
|
|
if ( isset( $args['item-max-length'] ) ) {
|
2012-02-13 07:23:56 +00:00
|
|
|
$this->ItemMaxLength = $args['item-max-length'];
|
2012-02-18 07:35:23 +00:00
|
|
|
} elseif ( isset( $wgRSSItemMaxLength ) && is_numeric( $wgRSSItemMaxLength ) ) {
|
2012-02-13 07:23:56 +00:00
|
|
|
$this->ItemMaxLength = $wgRSSItemMaxLength;
|
|
|
|
}
|
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
if ( isset( $args['filterout'] ) ) {
|
|
|
|
$this->filterOut = self::explodeOnSpaces( $args['filterout'] );
|
|
|
|
}
|
|
|
|
|
2011-08-15 21:52:08 +00:00
|
|
|
// 'template' is the pagename of a user's itemTemplate including
|
|
|
|
// a further pagename for the feedTemplate
|
|
|
|
// In that way everything is handled via these two pages
|
|
|
|
// and no default pages or templates are used.
|
|
|
|
|
|
|
|
// 'templatename' is an optional pagename of a user's feedTemplate
|
|
|
|
// In that way it substitutes $1 (default: RSSPost) in MediaWiki:Rss-item
|
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
if ( isset( $args['template'] ) ) {
|
2011-08-15 21:52:08 +00:00
|
|
|
$itemTemplateTitleObject = Title::newFromText( $args['template'], NS_TEMPLATE );
|
|
|
|
$itemTemplateArticleObject = new Article( $itemTemplateTitleObject, 0 );
|
|
|
|
$this->itemTemplate = $itemTemplateArticleObject->fetchContent();
|
2010-11-04 23:19:00 +00:00
|
|
|
} else {
|
2011-08-15 21:52:08 +00:00
|
|
|
if ( isset( $args['templatename'] ) ) {
|
|
|
|
$feedTemplatePagename = $args['templatename'];
|
|
|
|
} else {
|
|
|
|
|
|
|
|
// compatibility patch for rss extension
|
|
|
|
|
2012-01-20 21:46:31 +00:00
|
|
|
$feedTemplatePagename = 'RSSPost';
|
2011-08-15 21:52:08 +00:00
|
|
|
$feedTemplateTitleObject = Title::newFromText( $feedTemplatePagename, NS_TEMPLATE );
|
|
|
|
|
|
|
|
if ( !$feedTemplateTitleObject->exists() ) {
|
|
|
|
$feedTemplatePagename = Title::makeTitleSafe( NS_MEDIAWIKI, 'Rss-feed' );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// MediaWiki:Rss-item = {{ feedTemplatePagename | title = {{{title}}} | ... }}
|
|
|
|
|
|
|
|
// if the attribute parameter templatename= is not present
|
|
|
|
// then it defaults to
|
|
|
|
// {{ Template:RSSPost | title = {{{title}}} | ... }} - if Template:RSSPost exists from pre-1.9 versions
|
|
|
|
// {{ MediaWiki:Rss-feed | title = {{{title}}} | ... }} - otherwise
|
|
|
|
|
|
|
|
$this->itemTemplate = wfMsgNoTrans( 'rss-item', $feedTemplatePagename );
|
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2010-11-05 00:59:28 +00:00
|
|
|
* Return RSS object for the given URL, maintaining caching.
|
|
|
|
*
|
|
|
|
* NOTES ON RETRIEVING REMOTE FILES:
|
|
|
|
* No attempt will be made to fetch remote files if there is something in cache.
|
|
|
|
*
|
|
|
|
* NOTES ON FAILED REQUESTS:
|
|
|
|
* If there is an HTTP error while fetching an RSS object, the cached version
|
2010-11-16 23:21:36 +00:00
|
|
|
* will be returned, if it exists.
|
2010-11-05 00:59:28 +00:00
|
|
|
*
|
2011-01-08 01:33:42 +00:00
|
|
|
* @return Status object
|
2010-11-05 00:59:28 +00:00
|
|
|
*/
|
|
|
|
function fetch() {
|
2010-11-04 23:19:00 +00:00
|
|
|
if ( !isset( $this->url ) ) {
|
|
|
|
return Status::newFatal( 'rss-fetch-nourl' );
|
|
|
|
}
|
|
|
|
|
|
|
|
// Flow
|
|
|
|
// 1. check cache
|
|
|
|
// 2. if there is a hit, make sure its fresh
|
|
|
|
// 3. if cached obj fails freshness check, fetch remote
|
|
|
|
// 4. if remote fails, return stale object, or error
|
2010-11-13 19:16:39 +00:00
|
|
|
$key = wfMemcKey( 'rss', $this->url );
|
2010-11-04 23:19:00 +00:00
|
|
|
$cachedFeed = $this->loadFromCache( $key );
|
|
|
|
if ( $cachedFeed !== false ) {
|
|
|
|
wfDebugLog( 'RSS', 'Outputting cached feed for ' . $this->url );
|
|
|
|
return Status::newGood();
|
|
|
|
}
|
2010-11-05 00:59:28 +00:00
|
|
|
wfDebugLog( 'RSS', 'Cache Failed, fetching ' . $this->url . ' from remote.' );
|
2010-11-04 23:19:00 +00:00
|
|
|
|
|
|
|
$status = $this->fetchRemote( $key );
|
|
|
|
return $status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieve the URL from the cache
|
2010-11-05 00:59:28 +00:00
|
|
|
* @param $key String: lookup key to associate with this item
|
|
|
|
* @return boolean
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
protected function loadFromCache( $key ) {
|
|
|
|
global $wgMemc, $wgRSSCacheCompare;
|
|
|
|
|
|
|
|
$data = $wgMemc->get( $key );
|
2010-11-13 19:16:39 +00:00
|
|
|
if ( !is_array( $data ) ) {
|
2010-11-04 23:19:00 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
list( $etag, $lastModified, $rss ) =
|
|
|
|
$data;
|
|
|
|
|
|
|
|
if ( !isset( $rss->items ) ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
wfDebugLog( 'RSS', "Got '$key' from cache" );
|
|
|
|
|
|
|
|
# Now that we've verified that we got useful data, keep it around.
|
|
|
|
$this->rss = $rss;
|
|
|
|
$this->etag = $etag;
|
|
|
|
$this->lastModified = $lastModified;
|
|
|
|
|
|
|
|
// We only care if $wgRSSCacheCompare is > 0
|
|
|
|
if ( $wgRSSCacheCompare && time() - $wgRSSCacheCompare > $lastModified ) {
|
2010-11-05 00:59:28 +00:00
|
|
|
wfDebugLog( 'RSS', 'Content is old enough that we need to check cached content' );
|
2010-11-04 23:19:00 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2010-11-13 19:16:39 +00:00
|
|
|
* Store these objects (i.e. etag, lastModified, and RSS) in the cache.
|
2010-11-05 00:59:28 +00:00
|
|
|
* @param $key String: lookup key to associate with this item
|
|
|
|
* @return boolean
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
protected function storeInCache( $key ) {
|
|
|
|
global $wgMemc, $wgRSSCacheAge;
|
|
|
|
|
|
|
|
if ( !isset( $this->rss ) ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
$r = $wgMemc->set( $key,
|
|
|
|
array( $this->etag, $this->lastModified, $this->rss ),
|
|
|
|
$wgRSSCacheAge );
|
|
|
|
|
|
|
|
wfDebugLog( 'RSS', "Stored '$key' as in cache? $r");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieve a feed.
|
2010-11-05 00:59:28 +00:00
|
|
|
* @param $key String:
|
2010-11-04 23:19:00 +00:00
|
|
|
* @param $headers Array: headers to send along with the request
|
|
|
|
* @return Status object
|
|
|
|
*/
|
|
|
|
protected function fetchRemote( $key, array $headers = array()) {
|
2012-02-27 01:10:57 +00:00
|
|
|
global $wgRSSFetchTimeout, $wgRSSUserAgent, $wgRSSProxy,
|
|
|
|
$wgRSSUrlNumberOfAllowedRedirects;
|
2010-11-04 23:19:00 +00:00
|
|
|
|
|
|
|
if ( $this->etag ) {
|
|
|
|
wfDebugLog( 'RSS', 'Used etag: ' . $this->etag );
|
|
|
|
$headers['If-None-Match'] = $this->etag;
|
|
|
|
}
|
|
|
|
if ( $this->lastModified ) {
|
2010-11-05 00:59:28 +00:00
|
|
|
$lm = gmdate( 'r', $this->lastModified );
|
2010-11-04 23:19:00 +00:00
|
|
|
wfDebugLog( 'RSS', "Used last modified: $lm" );
|
|
|
|
$headers['If-Modified-Since'] = $lm;
|
|
|
|
}
|
|
|
|
|
2012-02-23 21:12:54 +00:00
|
|
|
/**
|
|
|
|
* 'noProxy' can conditionally be set as shown in the commented
|
|
|
|
* example below; in HttpRequest 'noProxy' takes precedence over
|
|
|
|
* any value of 'proxy' and disables the use of a proxy.
|
|
|
|
*
|
|
|
|
* This is useful if you run the wiki in an intranet and need to
|
|
|
|
* access external feed urls through a proxy but internal feed
|
|
|
|
* urls must be accessed without a proxy.
|
|
|
|
*
|
|
|
|
* The general handling of such cases will be subject of a
|
|
|
|
* forthcoming version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
$url = $this->url;
|
2012-02-27 01:10:57 +00:00
|
|
|
$noProxy = !isset( $wgRSSProxy );
|
2012-02-23 21:12:54 +00:00
|
|
|
|
|
|
|
// Example for disabling proxy use for certain urls
|
|
|
|
// $noProxy = preg_match( '!\.internal\.example\.com$!i', parse_url( $url, PHP_URL_HOST ) );
|
2012-02-27 01:10:57 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Copied from HttpFunctions.php
|
|
|
|
* Perform an HTTP request
|
|
|
|
*
|
|
|
|
* @param $method String: HTTP method. Usually GET/POST
|
|
|
|
* @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL
|
|
|
|
* @param $options Array: options to pass to MWHttpRequest object.
|
|
|
|
* Possible keys for the array:
|
|
|
|
* - timeout Timeout length in seconds
|
|
|
|
* - postData An array of key-value pairs or a url-encoded form data
|
|
|
|
* - proxy The proxy to use.
|
|
|
|
* Otherwise it will use $wgHTTPProxy (if set)
|
|
|
|
* Otherwise it will use the environment variable "http_proxy" (if set)
|
|
|
|
* - noProxy Don't use any proxy at all. Takes precedence over proxy value(s).
|
|
|
|
* - sslVerifyHost (curl only) Verify hostname against certificate
|
|
|
|
* - sslVerifyCert (curl only) Verify SSL certificate
|
|
|
|
* - caInfo (curl only) Provide CA information
|
|
|
|
* - maxRedirects Maximum number of redirects to follow (defaults to 5)
|
|
|
|
* - followRedirects Whether to follow redirects (defaults to false).
|
|
|
|
* Note: this should only be used when the target URL is trusted,
|
|
|
|
* to avoid attacks on intranet services accessible by HTTP.
|
|
|
|
* - userAgent A user agent, if you want to override the default
|
|
|
|
* MediaWiki/$wgVersion
|
|
|
|
* @return Mixed: (bool)false on failure or a string on success
|
|
|
|
*/
|
|
|
|
|
|
|
|
if ( isset( $wgRSSUrlNumberOfAllowedRedirects )
|
|
|
|
&& is_numeric( $wgRSSUrlNumberOfAllowedRedirects ) ) {
|
|
|
|
$maxRedirects = $wgRSSUrlNumberOfAllowedRedirects;
|
|
|
|
} else {
|
|
|
|
$maxRedirects = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// we set followRedirects intentionally to true to see error messages
|
|
|
|
// in cases where the maximum number of redirects is reached
|
2012-02-23 21:12:54 +00:00
|
|
|
$client = HttpRequest::factory( $url,
|
|
|
|
array(
|
2012-02-27 01:10:57 +00:00
|
|
|
'timeout' => $wgRSSFetchTimeout,
|
|
|
|
'followRedirects' => true,
|
|
|
|
'maxRedirects' => $maxRedirects,
|
|
|
|
'proxy' => $wgRSSProxy,
|
|
|
|
'noProxy' => $noProxy,
|
|
|
|
'userAgent' => $wgRSSUserAgent,
|
2012-02-23 21:12:54 +00:00
|
|
|
)
|
|
|
|
);
|
2010-11-25 12:38:25 +00:00
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
foreach ( $headers as $header => $value ) {
|
|
|
|
$client->setHeader( $header, $value );
|
|
|
|
}
|
|
|
|
|
|
|
|
$fetch = $client->execute();
|
|
|
|
$this->client = $client;
|
|
|
|
|
|
|
|
if ( !$fetch->isGood() ) {
|
|
|
|
wfDebug( 'RSS', 'Request Failed: ' . $fetch->getWikiText() );
|
|
|
|
return $fetch;
|
|
|
|
}
|
|
|
|
|
|
|
|
$ret = $this->responseToXML( $key );
|
|
|
|
return $ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Render the entire feed so that each item is passed to the
|
|
|
|
* template which the MediaWiki then displays.
|
|
|
|
*
|
|
|
|
* @param $parser the parser param to pass to recursiveTagParse()
|
|
|
|
* @param $frame the frame param to pass to recursiveTagParse()
|
|
|
|
*/
|
|
|
|
function renderFeed( $parser, $frame ) {
|
2011-08-15 21:52:08 +00:00
|
|
|
|
|
|
|
$renderedFeed = '';
|
|
|
|
|
|
|
|
if ( isset( $this->itemTemplate ) && isset( $parser ) && isset( $frame ) ) {
|
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
$headcnt = 0;
|
|
|
|
if ( $this->reversed ) {
|
|
|
|
$this->rss->items = array_reverse( $this->rss->items );
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach ( $this->rss->items as $item ) {
|
|
|
|
if ( $this->maxheads > 0 && $headcnt >= $this->maxheads ) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( $this->canDisplay( $item ) ) {
|
2011-08-15 21:52:08 +00:00
|
|
|
$renderedFeed .= $this->renderItem( $item ) . "\n";
|
2010-11-04 23:19:00 +00:00
|
|
|
$headcnt++;
|
|
|
|
}
|
|
|
|
}
|
2011-08-15 21:52:08 +00:00
|
|
|
|
|
|
|
$renderedFeed = $parser->recursiveTagParse( $renderedFeed, $frame );
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return $renderedFeed;
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2010-11-05 00:59:28 +00:00
|
|
|
* Render each item, filtering it out if necessary, applying any highlighting.
|
|
|
|
*
|
2011-08-15 21:52:08 +00:00
|
|
|
* @param $item Array: an array produced by RSSData where keys are the names of the RSS elements
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
2011-08-15 21:52:08 +00:00
|
|
|
protected function renderItem( $item ) {
|
|
|
|
|
|
|
|
$renderedItem = $this->itemTemplate;
|
2010-11-19 21:02:01 +00:00
|
|
|
|
2011-08-15 21:52:08 +00:00
|
|
|
// $info will only be an XML element name, so we're safe using it.
|
|
|
|
// $item[$info] is handled by the XML parser --
|
|
|
|
// and that means bad RSS with stuff like
|
|
|
|
// <description><script>alert("hi")</script></description> will find its
|
|
|
|
// rogue <script> tags neutered.
|
2012-02-13 07:23:56 +00:00
|
|
|
// use the overloaded multi byte wrapper functions in GlobalFunctions.php
|
2011-08-15 21:52:08 +00:00
|
|
|
|
|
|
|
foreach ( array_keys( $item ) as $info ) {
|
2012-02-13 01:39:24 +00:00
|
|
|
switch ( $info ) {
|
2012-02-24 10:16:34 +00:00
|
|
|
// ATOM <id> elements and RSS <link> elements are item link urls
|
|
|
|
case 'id':
|
|
|
|
$txt = $this->sanitizeUrl( $item['id'] );
|
|
|
|
$renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem );
|
|
|
|
break;
|
2012-02-13 01:39:24 +00:00
|
|
|
case 'link':
|
2012-02-24 10:16:34 +00:00
|
|
|
if ( !isset( $item['id'] ) ) {
|
|
|
|
$txt = $this->sanitizeUrl( $item['link'] );
|
|
|
|
}
|
|
|
|
$renderedItem = str_replace( '{{{link}}}', $txt, $renderedItem );
|
2012-02-13 01:39:24 +00:00
|
|
|
break;
|
|
|
|
case 'date':
|
|
|
|
$tempTimezone = date_default_timezone_get();
|
|
|
|
date_default_timezone_set( 'UTC' );
|
2012-02-24 10:16:34 +00:00
|
|
|
$txt = date( $this->date, strtotime( $this->escapeTemplateParameter( $item['date'] ) ) );
|
2012-02-13 01:39:24 +00:00
|
|
|
date_default_timezone_set( $tempTimezone );
|
2012-02-24 10:16:34 +00:00
|
|
|
$renderedItem = str_replace( '{{{date}}}', $txt, $renderedItem );
|
2012-02-13 01:39:24 +00:00
|
|
|
break;
|
2012-02-13 07:23:56 +00:00
|
|
|
default:
|
2012-02-24 10:16:34 +00:00
|
|
|
$str = $this->escapeTemplateParameter( $item[$info] );
|
2012-02-13 07:23:56 +00:00
|
|
|
if ( mb_strlen( $str ) > $this->ItemMaxLength ) {
|
|
|
|
$str = mb_substr( $str, 0, $this->ItemMaxLength ) . " ...";
|
|
|
|
}
|
|
|
|
$txt = $this->highlightTerms( $str );
|
2012-02-24 10:16:34 +00:00
|
|
|
$renderedItem = str_replace( '{{{' . $info . '}}}', $txt, $renderedItem );
|
2011-08-15 21:52:08 +00:00
|
|
|
}
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
2011-08-15 21:52:08 +00:00
|
|
|
|
|
|
|
// nullify all remaining info items in the template
|
|
|
|
// without a corresponding info in the current feed item
|
|
|
|
|
|
|
|
$renderedItem = preg_replace( "!{{{[^}]+}}}!U", "", $renderedItem );
|
|
|
|
|
|
|
|
return $renderedItem;
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
|
|
|
|
2010-11-23 02:16:05 +00:00
|
|
|
/**
|
|
|
|
* Sanitize a URL for inclusion in wikitext. Escapes characters that have
|
|
|
|
* a special meaning in wikitext, replacing them with URL escape codes, so
|
|
|
|
* that arbitrary input can be included as a free or bracketed external
|
|
|
|
* link and both work and be safe.
|
|
|
|
*/
|
|
|
|
protected function sanitizeUrl( $url ) {
|
|
|
|
# Remove control characters
|
|
|
|
$url = preg_replace( '/[\000-\037\177]/', '', $url );
|
|
|
|
# Escape other problematic characters
|
|
|
|
$i = 0;
|
|
|
|
$out = '';
|
|
|
|
for ( $i = 0; $i < strlen( $url ); $i++ ) {
|
|
|
|
$boringLength = strcspn( $url, '<>"[|]\ {', $i );
|
|
|
|
if ( $boringLength ) {
|
|
|
|
$out .= substr( $url, $i, $boringLength );
|
|
|
|
$i += $boringLength;
|
|
|
|
}
|
|
|
|
if ( $i < strlen( $url ) ) {
|
|
|
|
$out .= rawurlencode( $url[$i] );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sanitize user input for inclusion as a template parameter.
|
2012-02-24 11:37:35 +00:00
|
|
|
*
|
2010-11-23 02:16:05 +00:00
|
|
|
* Unlike in wfEscapeWikiText() as of r77127, this escapes }} in addition
|
|
|
|
* to the other kinds of markup, to avoid user input ending a template
|
|
|
|
* invocation.
|
2012-02-24 11:37:35 +00:00
|
|
|
*
|
|
|
|
* We change differently flavoured <p> and <br> tags to effective <br> tags,
|
|
|
|
* other tags such as <a> will be rendered html-escaped.
|
|
|
|
*
|
2010-11-23 02:16:05 +00:00
|
|
|
*/
|
|
|
|
protected function escapeTemplateParameter( $text ) {
|
2012-02-24 11:37:35 +00:00
|
|
|
$text = str_replace(
|
2010-11-23 02:16:05 +00:00
|
|
|
array( '[', '|', ']', '\'', 'ISBN ',
|
2012-02-24 11:37:35 +00:00
|
|
|
'RFC ', '://', "\n=", '{{', '}}',
|
|
|
|
),
|
2010-11-23 02:16:05 +00:00
|
|
|
array( '[', '|', ']', ''', 'ISBN ',
|
2012-02-24 11:37:35 +00:00
|
|
|
'RFC ', '://', "\n=", '{{', '}}',
|
|
|
|
),
|
|
|
|
htmlspecialchars( str_replace( "\n", "", $text ) )
|
2010-11-23 02:16:05 +00:00
|
|
|
);
|
2012-02-24 11:37:35 +00:00
|
|
|
|
|
|
|
// keep some basic layout tags
|
|
|
|
$text = str_replace(
|
|
|
|
array( '<p>', '</p>',
|
|
|
|
'<br/>', '<br>', '</br>',
|
|
|
|
'<b>', '</b>',
|
|
|
|
'<i>', '</i>',
|
|
|
|
'<u>', '</u>',
|
|
|
|
'<s>', '</s>',
|
|
|
|
),
|
|
|
|
array( "", "<br/>",
|
|
|
|
"<br/>", "<br/>", "<br/>",
|
|
|
|
"'''", "'''",
|
|
|
|
"''", "''",
|
|
|
|
"<u>", "</u>",
|
|
|
|
"<s>", "</s>",
|
|
|
|
),
|
|
|
|
$text
|
|
|
|
);
|
|
|
|
|
|
|
|
return $text;
|
2010-11-23 02:16:05 +00:00
|
|
|
}
|
|
|
|
|
2010-11-04 23:19:00 +00:00
|
|
|
/**
|
|
|
|
* Parse an HTTP response object into an array of relevant RSS data
|
2010-11-05 00:59:28 +00:00
|
|
|
*
|
|
|
|
* @param $key String: the key to use to store the parsed response in the cache
|
2010-11-04 23:19:00 +00:00
|
|
|
* @return parsed RSS object (see RSSParse) or false
|
|
|
|
*/
|
|
|
|
protected function responseToXML( $key ) {
|
2010-11-05 00:59:28 +00:00
|
|
|
wfDebugLog( 'RSS', "Got '" . $this->client->getStatus() . "', updating cache for $key" );
|
2010-11-04 23:19:00 +00:00
|
|
|
if ( $this->client->getStatus() === 304 ) {
|
|
|
|
# Not modified, update cache
|
|
|
|
wfDebugLog( 'RSS', "Got 304, updating cache for $key" );
|
|
|
|
$this->storeInCache( $key );
|
|
|
|
} else {
|
|
|
|
$this->xml = new DOMDocument;
|
|
|
|
$raw_xml = $this->client->getContent();
|
|
|
|
|
2010-11-05 00:59:28 +00:00
|
|
|
if( $raw_xml == '' ) {
|
|
|
|
return Status::newFatal( 'rss-parse-error', 'No XML content' );
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
wfSuppressWarnings();
|
|
|
|
$this->xml->loadXML( $raw_xml );
|
|
|
|
wfRestoreWarnings();
|
|
|
|
|
|
|
|
$this->rss = new RSSData( $this->xml );
|
|
|
|
|
|
|
|
// if RSS parsed successfully
|
2010-11-13 19:11:28 +00:00
|
|
|
if ( $this->rss && !$this->rss->error ) {
|
2010-11-04 23:19:00 +00:00
|
|
|
$this->etag = $this->client->getResponseHeader( 'Etag' );
|
|
|
|
$this->lastModified =
|
|
|
|
strtotime( $this->client->getResponseHeader( 'Last-Modified' ) );
|
|
|
|
|
|
|
|
wfDebugLog( 'RSS', 'Stored etag (' . $this->etag . ') and Last-Modified (' .
|
|
|
|
$this->client->getResponseHeader( 'Last-Modified' ) . ') and items (' .
|
|
|
|
count( $this->rss->items ) . ')!' );
|
|
|
|
$this->storeInCache( $key );
|
|
|
|
} else {
|
2010-11-13 19:11:28 +00:00
|
|
|
return Status::newFatal( 'rss-parse-error', $this->rss->error );
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return Status::newGood();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Determine if a given item should or should not be displayed
|
2010-11-05 00:59:28 +00:00
|
|
|
*
|
|
|
|
* @param $item Array: associative array that RSSData produced for an <item>
|
|
|
|
* @return boolean
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
protected function canDisplay( array $item ) {
|
2010-11-05 00:59:28 +00:00
|
|
|
$check = '';
|
2010-11-04 23:19:00 +00:00
|
|
|
|
|
|
|
/* We're only going to check the displayable fields */
|
|
|
|
foreach ( $this->displayFields as $field ) {
|
|
|
|
if ( isset( $item[$field] ) ) {
|
|
|
|
$check .= $item[$field];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( $this->filter( $check, 'filterOut' ) ) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if ( $this->filter( $check, 'filter' ) ) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Filters items in or out if the match a string we're looking for.
|
2010-11-05 00:59:28 +00:00
|
|
|
*
|
|
|
|
* @param $text String: the text to examine
|
|
|
|
* @param $filterType String: "filterOut" to check for matches in the
|
2012-02-27 01:10:57 +00:00
|
|
|
* filterOut member list.
|
|
|
|
* Otherwise, uses the filter member list.
|
2010-11-05 00:59:28 +00:00
|
|
|
* @return Boolean: decision to filter or not.
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
protected function filter( $text, $filterType ) {
|
|
|
|
if ( $filterType === 'filterOut' ) {
|
|
|
|
$filter = $this->filterOut;
|
|
|
|
} else {
|
|
|
|
$filter = $this->filter;
|
|
|
|
}
|
|
|
|
|
2010-11-05 00:59:28 +00:00
|
|
|
if ( count( $filter ) == 0 ) {
|
|
|
|
return $filterType !== 'filterOut';
|
|
|
|
}
|
2010-11-04 23:19:00 +00:00
|
|
|
|
|
|
|
/* Using : for delimiter here since it'll be quoted automatically. */
|
2010-11-05 00:59:28 +00:00
|
|
|
$match = preg_match( ':(' . implode( '|', array_map( 'preg_quote', $filter ) ) . '):i', $text ) ;
|
2010-11-04 23:19:00 +00:00
|
|
|
if ( $match ) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Highlight the words we're supposed to be looking for
|
2010-11-05 00:59:28 +00:00
|
|
|
*
|
|
|
|
* @param $text String: the text to look in.
|
|
|
|
* @return String with matched text highlighted in a <span> element
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
protected function highlightTerms( $text ) {
|
|
|
|
if ( count( $this->highlight ) === 0 ) {
|
|
|
|
return $text;
|
|
|
|
}
|
|
|
|
|
|
|
|
RSSHighlighter::setTerms( $this->highlight );
|
2010-11-05 00:59:28 +00:00
|
|
|
$highlight = ':'. implode( '|', array_map( 'preg_quote', array_values( $this->highlight ) ) ) . ':i';
|
2010-11-04 23:19:00 +00:00
|
|
|
return preg_replace_callback( $highlight, 'RSSHighlighter::highlightThis', $text );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class RSSHighlighter {
|
|
|
|
static $terms = array();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the list of terms to match for the next highlighting session
|
2010-11-05 00:59:28 +00:00
|
|
|
* @param $terms Array: list of words to match.
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
static function setTerms( array $terms ) {
|
|
|
|
self::$terms = array_flip( array_map( 'strtolower', $terms ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Actually replace the supplied list of words with HTML code to highlight the words.
|
2010-11-05 00:59:28 +00:00
|
|
|
* @param $match Array: list of matched words to highlight.
|
|
|
|
* The words are assigned colors based upon the order
|
|
|
|
* they were supplied in setTerms()
|
|
|
|
* @return String word wrapped in HTML code.
|
2010-11-04 23:19:00 +00:00
|
|
|
*/
|
|
|
|
static function highlightThis( $match ) {
|
|
|
|
$styleStart = "<span style='font-weight: bold; background: none repeat scroll 0%% 0%% rgb(%s); color: %s;'>";
|
2010-11-05 00:59:28 +00:00
|
|
|
$styleEnd = '</span>';
|
2010-11-04 23:19:00 +00:00
|
|
|
|
2010-11-16 22:06:59 +00:00
|
|
|
# bg colors cribbed from Google's highlighting of search terms
|
2010-11-04 23:19:00 +00:00
|
|
|
$bgcolor = array( '255, 255, 102', '160, 255, 255', '153, 255, 153',
|
|
|
|
'255, 153, 153', '255, 102, 255', '136, 0, 0', '0, 170, 0', '136, 104, 0',
|
|
|
|
'0, 70, 153', '153, 0, 153' );
|
|
|
|
# Spelling out the fg colors instead of using processing time to create this list
|
2010-11-05 00:59:28 +00:00
|
|
|
$color = array( 'black', 'black', 'black', 'black', 'black',
|
|
|
|
'white', 'white', 'white', 'white', 'white' );
|
2010-11-04 23:19:00 +00:00
|
|
|
|
2010-11-05 00:59:28 +00:00
|
|
|
$index = self::$terms[strtolower( $match[0] )] % count( $bgcolor );
|
2010-11-04 23:19:00 +00:00
|
|
|
|
2010-11-05 00:59:28 +00:00
|
|
|
return sprintf( $styleStart, $bgcolor[$index], $color[$index] ) . $match[0] . $styleEnd;
|
2010-11-04 23:19:00 +00:00
|
|
|
}
|
|
|
|
}
|
2012-02-23 21:12:54 +00:00
|
|
|
|
|
|
|
class RSSUtils {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Output an error message, all wraped up nicely.
|
|
|
|
* @param String $errorMessageName The system message that this error is
|
|
|
|
* @param String|Array $param Error parameter (or parameters)
|
|
|
|
* @return String Html that is the error.
|
|
|
|
*/
|
2012-02-27 01:10:57 +00:00
|
|
|
public static function RSSError( $errorMessageName, $param = false ) {
|
2012-02-23 21:12:54 +00:00
|
|
|
|
|
|
|
// Anything from a parser tag should use Content lang for message,
|
|
|
|
// since the cache doesn't vary by user language: do not use wfMsgForContent but wfMsgForContent
|
|
|
|
// The ->parse() part makes everything safe from an escaping standpoint.
|
|
|
|
|
|
|
|
return Html::rawElement( 'span', array( 'class' => 'error' ),
|
|
|
|
"Extension:RSS -- Error: " . wfMessage( $errorMessageName )->inContentLanguage()->params( $param )->parse()
|
|
|
|
);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|