mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-09-27 12:16:51 +00:00
bd98eb4c5a
The TokenTransformDispatcher now actually implements an asynchronous, phased token transformation framework as described in https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations. Additionally, the parser pipeline is now mostly held together using events. The tokenizer still emits a lame single events with all tokens, as block-level emission failed with scoping issues specific to the PEGJS parser generator. All stages clean up when receiving the end tokens, so that the full pipeline can be used for repeated parsing. The QuoteTransformer is not yet 100% fixed to work with the new interface, and the Cite extension is disabled for now pending adaptation. Bold-italic related tests are failing currently.
116 lines
3.1 KiB
JavaScript
116 lines
3.1 KiB
JavaScript
/* Front-end/Wrapper for a particular tree builder, in this case the
|
|
* parser/tree builder from the node 'html5' module. Feed it tokens using
|
|
* processToken, and it will build you a DOM tree retrievable using .document
|
|
* or .body(). */
|
|
|
|
var events = require('events');
|
|
var HTML5 = require('./html5/index');
|
|
|
|
FauxHTML5 = {};
|
|
|
|
|
|
FauxHTML5.TreeBuilder = function ( ) {
|
|
// The parser we are going to emit our tokens to
|
|
this.parser = new HTML5.Parser();
|
|
|
|
// Sets up the parser
|
|
this.parser.parse(this);
|
|
|
|
// implicitly start a new document
|
|
this.processToken({type: 'TAG', name: 'body'});
|
|
};
|
|
|
|
FauxHTML5.TreeBuilder.prototype = new events.EventEmitter();
|
|
|
|
FauxHTML5.TreeBuilder.prototype.subscribeToTokenEmitter = function ( emitter ) {
|
|
emitter.addListener('chunk', this.onChunk.bind( this ) );
|
|
emitter.addListener('end', this.onEnd.bind( this ) );
|
|
};
|
|
|
|
FauxHTML5.TreeBuilder.prototype.onChunk = function ( tokens ) {
|
|
for (var i = 0, length = tokens.length; i < length; i++) {
|
|
this.processToken(tokens[i]);
|
|
}
|
|
};
|
|
|
|
FauxHTML5.TreeBuilder.prototype.onEnd = function ( ) {
|
|
//console.log('Fauxhtml5 onEnd');
|
|
// FIXME HACK: For some reason the end token is not processed sometimes,
|
|
// which normally fixes the body reference up.
|
|
this.document = this.parser.document;
|
|
this.document.body = this.parser
|
|
.document.getElementsByTagName('body')[0];
|
|
|
|
// XXX: more clean up to allow reuse.
|
|
this.parser.setup();
|
|
this.processToken({type: 'TAG', name: 'body'});
|
|
};
|
|
|
|
|
|
// Adapt the token format to internal HTML tree builder format, call the actual
|
|
// html tree builder by emitting the token.
|
|
FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
|
|
var att = function (maybeAttribs) {
|
|
if ( $.isArray( maybeAttribs ) ) {
|
|
var atts = [];
|
|
for(var i = 0, length = maybeAttribs.length; i < length; i++) {
|
|
var att = maybeAttribs[i];
|
|
atts.push({nodeName: att[0], nodeValue: att[1]});
|
|
}
|
|
return atts;
|
|
} else {
|
|
return [];
|
|
}
|
|
};
|
|
|
|
switch (token.type) {
|
|
case "TEXT":
|
|
this.emit('token', {type: 'Characters', data: token.value});
|
|
break;
|
|
case "TAG":
|
|
this.emit('token', {type: 'StartTag',
|
|
name: token.name,
|
|
data: att(token.attribs)});
|
|
break;
|
|
case "ENDTAG":
|
|
this.emit('token', {type: 'EndTag',
|
|
name: token.name,
|
|
data: att(token.attribs)});
|
|
break;
|
|
case "SELFCLOSINGTAG":
|
|
this.emit('token', {type: 'StartTag',
|
|
name: token.name,
|
|
data: att(token.attribs)});
|
|
this.emit('token', {type: 'EndTag',
|
|
name: token.name,
|
|
data: att(token.attribs)});
|
|
break;
|
|
case "COMMENT":
|
|
this.emit('token', {type: 'Comment',
|
|
data: token.value});
|
|
break;
|
|
case "END":
|
|
this.emit('end');
|
|
this.emit('token', { type: 'EOF' } );
|
|
this.document = this.parser.document;
|
|
if ( ! this.document.body ) {
|
|
// HACK: This should not be needed really.
|
|
this.document.body = this.parser.document.getElementsByTagName('body')[0];
|
|
}
|
|
break;
|
|
case "NEWLINE":
|
|
//this.emit('end');
|
|
//this.emit('token', {type: 'Characters', data: "\n"});
|
|
break;
|
|
default:
|
|
console.log("Unhandled token: " + JSON.stringify(token));
|
|
break;
|
|
}
|
|
};
|
|
|
|
|
|
|
|
if (typeof module == "object") {
|
|
module.exports.FauxHTML5 = FauxHTML5;
|
|
}
|