2012-01-18 01:42:56 +00:00
|
|
|
/*
|
|
|
|
* General token sanitizer. Strips out (or encapsulates) unsafe and disallowed
|
|
|
|
* tag types and attributes. Should run last in the third, synchronous
|
|
|
|
* expansion stage. Tokens from extensions which should not be sanitized
|
|
|
|
* can bypass sanitation by setting their rank to 3.
|
|
|
|
*
|
|
|
|
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
// Include general utilities
|
|
|
|
var Util = require('./ext.Util.js').Util,
|
|
|
|
u = new Util();
|
|
|
|
|
|
|
|
|
|
|
|
function Sanitizer ( manager ) {
|
2012-02-08 15:10:30 +00:00
|
|
|
this.manager = manager;
|
2012-01-18 01:42:56 +00:00
|
|
|
this.register( manager );
|
|
|
|
}
|
|
|
|
|
|
|
|
// constants
|
|
|
|
Sanitizer.prototype.handledRank = 2.99;
|
|
|
|
Sanitizer.prototype.anyRank = 2.9901;
|
|
|
|
|
|
|
|
|
|
|
|
// Register this transformer with the TokenTransformer
|
|
|
|
Sanitizer.prototype.register = function ( manager ) {
|
|
|
|
this.manager = manager;
|
|
|
|
manager.addTransform( this.onAnchor.bind(this), this.handledRank, 'tag', 'a' );
|
2012-02-13 17:02:23 +00:00
|
|
|
manager.addTransform( this.onAny.bind(this), this.anyRank, 'any' );
|
2012-01-18 01:42:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
Sanitizer.prototype.onAnchor = function ( token ) {
|
|
|
|
// perform something similar to Sanitizer::cleanUrl
|
2012-02-01 18:37:48 +00:00
|
|
|
if ( token.constructor === EndTagTk ) {
|
2012-01-18 01:42:56 +00:00
|
|
|
return { token: token };
|
|
|
|
}
|
|
|
|
var hrefKV = this.manager.env.lookupKV( token.attribs, 'href' );
|
2012-02-08 15:10:30 +00:00
|
|
|
// FIXME: Should the flattening of attributes to text be performed as part
|
|
|
|
// of the AttributeTransformManager processing? This certainly is not the
|
|
|
|
// right place!
|
2012-01-18 01:42:56 +00:00
|
|
|
if ( hrefKV !== null ) {
|
2012-02-08 15:10:30 +00:00
|
|
|
hrefKV.v = this.manager.env.tokensToString( hrefKV.v );
|
2012-02-01 16:30:43 +00:00
|
|
|
var bits = hrefKV.v.match( /(.*?\/\/)([^\/]+)(\/?.*)/ );
|
2012-01-18 01:42:56 +00:00
|
|
|
if ( bits ) {
|
|
|
|
proto = bits[1];
|
|
|
|
host = bits[2];
|
|
|
|
path = bits[3];
|
|
|
|
} else {
|
|
|
|
proto = '';
|
|
|
|
host = '';
|
2012-02-01 16:30:43 +00:00
|
|
|
path = hrefKV.v;
|
2012-01-18 01:42:56 +00:00
|
|
|
}
|
|
|
|
host = this._stripIDNs( host );
|
2012-02-01 16:30:43 +00:00
|
|
|
hrefKV.v = proto + host + path;
|
2012-01-18 01:42:56 +00:00
|
|
|
}
|
|
|
|
return { token: token };
|
|
|
|
};
|
|
|
|
|
|
|
|
// XXX: We actually need to strip IDN ignored characters in the link text as
|
|
|
|
// well, so that readers are not mislead. This should perhaps happen at an
|
|
|
|
// earlier stage, while converting links to html.
|
|
|
|
Sanitizer.prototype._IDNRegexp = new RegExp(
|
|
|
|
"[\t ]|" + // general whitespace
|
|
|
|
"\u00ad|" + // 00ad SOFT HYPHEN
|
|
|
|
"\u1806|" + // 1806 MONGOLIAN TODO SOFT HYPHEN
|
|
|
|
"\u200b|" + // 200b ZERO WIDTH SPACE
|
|
|
|
"\u2060|" + // 2060 WORD JOINER
|
|
|
|
"\ufeff|" + // feff ZERO WIDTH NO-BREAK SPACE
|
|
|
|
"\u034f|" + // 034f COMBINING GRAPHEME JOINER
|
|
|
|
"\u180b|" + // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
|
|
|
|
"\u180c|" + // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
|
|
|
|
"\u180d|" + // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
|
|
|
|
"\u200c|" + // 200c ZERO WIDTH NON-JOINER
|
|
|
|
"\u200d|" + // 200d ZERO WIDTH JOINER
|
2012-02-13 17:02:23 +00:00
|
|
|
"[\ufe00-\ufe0f]", // fe00-fe0f VARIATION SELECTOR-1-16
|
|
|
|
'g'
|
2012-01-18 01:42:56 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
Sanitizer.prototype._stripIDNs = function ( host ) {
|
|
|
|
return host.replace( this._IDNRegexp, '' );
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
2012-02-13 17:02:23 +00:00
|
|
|
/**
|
|
|
|
* Sanitize any tag.
|
2012-02-20 11:14:53 +00:00
|
|
|
*
|
|
|
|
* XXX: Make attribute sanitation reversible by storing round-trip info in
|
|
|
|
* token.dataAttribs object (which is serialized as JSON in a data-mw-rt
|
|
|
|
* attribute in the DOM).
|
2012-02-13 17:02:23 +00:00
|
|
|
*/
|
|
|
|
Sanitizer.prototype.onAny = function ( token ) {
|
|
|
|
// XXX: validate token type according to whitelist and convert non-ok ones
|
|
|
|
// back to text.
|
|
|
|
|
|
|
|
// Convert attributes to string, if necessary.
|
2012-02-20 11:14:53 +00:00
|
|
|
// XXX: Likely better done in AttributeTransformManager when processing is
|
|
|
|
// complete
|
2012-02-13 17:02:23 +00:00
|
|
|
if ( token.attribs ) {
|
|
|
|
for ( var i = 0, l = token.attribs.length; i < l; i++ ) {
|
|
|
|
var kv = token.attribs[i];
|
|
|
|
if ( kv.k.constructor === Array ) {
|
|
|
|
kv.k = this.manager.env.tokensToString ( kv.k );
|
|
|
|
}
|
|
|
|
if ( kv.v.constructor === Array ) {
|
|
|
|
kv.v = this.manager.env.tokensToString ( kv.v );
|
|
|
|
}
|
2012-02-18 20:16:23 +00:00
|
|
|
if ( kv.k === 'style' ) {
|
|
|
|
kv.v = this.checkCss(kv.v);
|
|
|
|
}
|
2012-02-13 17:02:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// XXX: Validate attributes
|
|
|
|
return { token: token };
|
|
|
|
};
|
2012-01-18 01:42:56 +00:00
|
|
|
|
2012-02-18 20:16:23 +00:00
|
|
|
Sanitizer.prototype.checkCss = function ( value ) {
|
|
|
|
if (/[\000-\010\016-\037\177]/.test(value)) {
|
|
|
|
return '/* invalid control char */';
|
|
|
|
}
|
|
|
|
if (/expression|filter\s*:|accelerator\s*:|url\s*\(/i.test(value)) {
|
|
|
|
return '/* insecure input */';
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
};
|
|
|
|
|
2012-01-18 01:42:56 +00:00
|
|
|
if (typeof module == "object") {
|
|
|
|
module.exports.Sanitizer = Sanitizer;
|
|
|
|
}
|