mediawiki-extensions-Visual.../modules/parser/ext.core.Sanitizer.js
Gabriel Wicke 3f7c1499cd Enable support for general preprocessor functionality in attribute keys and
values. This includes comments, templates and template arguments.

This also replaces the specialized expansion logic in the TemplateHandler. The
removal of link validation lets one more parser test fail for now. External
link target validation will need to be implemented in the token stream handler
for links. This is noted as TODO in
https://www.mediawiki.org/wiki/Future/Parser_development#Token_stream_transforms.
2012-02-08 15:10:30 +00:00

88 lines
2.6 KiB
JavaScript

/*
* General token sanitizer. Strips out (or encapsulates) unsafe and disallowed
* tag types and attributes. Should run last in the third, synchronous
* expansion stage. Tokens from extensions which should not be sanitized
* can bypass sanitation by setting their rank to 3.
*
* @author Gabriel Wicke <gwicke@wikimedia.org>
*/
// Include general utilities
var Util = require('./ext.Util.js').Util,
u = new Util();
function Sanitizer ( manager ) {
this.manager = manager;
this.register( manager );
}
// constants
Sanitizer.prototype.handledRank = 2.99;
Sanitizer.prototype.anyRank = 2.9901;
// Register this transformer with the TokenTransformer
Sanitizer.prototype.register = function ( manager ) {
this.manager = manager;
manager.addTransform( this.onAnchor.bind(this), this.handledRank, 'tag', 'a' );
};
Sanitizer.prototype.onAnchor = function ( token ) {
// perform something similar to Sanitizer::cleanUrl
if ( token.constructor === EndTagTk ) {
return { token: token };
}
var hrefKV = this.manager.env.lookupKV( token.attribs, 'href' );
// FIXME: Should the flattening of attributes to text be performed as part
// of the AttributeTransformManager processing? This certainly is not the
// right place!
if ( hrefKV !== null ) {
hrefKV.v = this.manager.env.tokensToString( hrefKV.v );
var bits = hrefKV.v.match( /(.*?\/\/)([^\/]+)(\/?.*)/ );
if ( bits ) {
proto = bits[1];
host = bits[2];
path = bits[3];
} else {
proto = '';
host = '';
path = hrefKV.v;
}
host = this._stripIDNs( host );
hrefKV.v = proto + host + path;
}
return { token: token };
};
// XXX: We actually need to strip IDN ignored characters in the link text as
// well, so that readers are not mislead. This should perhaps happen at an
// earlier stage, while converting links to html.
Sanitizer.prototype._IDNRegexp = new RegExp(
"[\t ]|" + // general whitespace
"\u00ad|" + // 00ad SOFT HYPHEN
"\u1806|" + // 1806 MONGOLIAN TODO SOFT HYPHEN
"\u200b|" + // 200b ZERO WIDTH SPACE
"\u2060|" + // 2060 WORD JOINER
"\ufeff|" + // feff ZERO WIDTH NO-BREAK SPACE
"\u034f|" + // 034f COMBINING GRAPHEME JOINER
"\u180b|" + // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
"\u180c|" + // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
"\u180d|" + // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
"\u200c|" + // 200c ZERO WIDTH NON-JOINER
"\u200d|" + // 200d ZERO WIDTH JOINER
"[\ufe00-\ufe0f]" // fe00-fe0f VARIATION SELECTOR-1-16
, 'g'
);
Sanitizer.prototype._stripIDNs = function ( host ) {
return host.replace( this._IDNRegexp, '' );
};
if (typeof module == "object") {
module.exports.Sanitizer = Sanitizer;
}