mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-28 16:20:52 +00:00
d918fa18ac
* Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
143 lines
3.9 KiB
JavaScript
143 lines
3.9 KiB
JavaScript
/* Front-end/Wrapper for a particular tree builder, in this case the
|
|
* parser/tree builder from the node 'html5' module. Feed it tokens using
|
|
* processToken, and it will build you a DOM tree retrievable using .document
|
|
* or .body(). */
|
|
|
|
var events = require('events'),
|
|
HTML5 = require('./html5/index');
|
|
|
|
FauxHTML5 = {};
|
|
|
|
|
|
FauxHTML5.TreeBuilder = function ( env ) {
|
|
// The parser we are going to emit our tokens to
|
|
this.parser = new HTML5.Parser();
|
|
|
|
// Sets up the parser
|
|
this.parser.parse(this);
|
|
|
|
// implicitly start a new document
|
|
this.processToken(new TagTk( 'body' ));
|
|
|
|
this.env = env;
|
|
};
|
|
|
|
// Inherit from EventEmitter
|
|
FauxHTML5.TreeBuilder.prototype = new events.EventEmitter();
|
|
FauxHTML5.TreeBuilder.prototype.constructor = FauxHTML5.TreeBuilder;
|
|
|
|
/**
|
|
* Register for (token) 'chunk' and 'end' events from a token emitter,
|
|
* normally the TokenTransformDispatcher.
|
|
*/
|
|
FauxHTML5.TreeBuilder.prototype.addListenersOn = function ( emitter ) {
|
|
emitter.addListener('chunk', this.onChunk.bind( this ) );
|
|
emitter.addListener('end', this.onEnd.bind( this ) );
|
|
};
|
|
|
|
FauxHTML5.TreeBuilder.prototype.onChunk = function ( tokens ) {
|
|
this.env.dp( 'chunk: ' + JSON.stringify( tokens, null, 2 ) );
|
|
for (var i = 0, length = tokens.length; i < length; i++) {
|
|
this.processToken(tokens[i]);
|
|
}
|
|
};
|
|
|
|
FauxHTML5.TreeBuilder.prototype.onEnd = function ( ) {
|
|
//console.warn('Fauxhtml5 onEnd');
|
|
// FIXME HACK: For some reason the end token is not processed sometimes,
|
|
// which normally fixes the body reference up.
|
|
var document = this.parser.document;
|
|
document.body = document.getElementsByTagName('body')[0];
|
|
|
|
//console.warn( 'onEnd: ' + document.body.innerHTML );
|
|
|
|
this.emit( 'document', document );
|
|
|
|
// XXX: more clean up to allow reuse.
|
|
this.parser.setup();
|
|
this.processToken(new TagTk( 'body' ));
|
|
};
|
|
|
|
FauxHTML5.TreeBuilder.prototype._att = function (maybeAttribs) {
|
|
var atts = [];
|
|
if ( maybeAttribs && $.isArray( maybeAttribs ) ) {
|
|
for(var i = 0, length = maybeAttribs.length; i < length; i++) {
|
|
var att = maybeAttribs[i];
|
|
atts.push({nodeName: att.k, nodeValue: att.v});
|
|
}
|
|
}
|
|
return atts;
|
|
};
|
|
|
|
// Adapt the token format to internal HTML tree builder format, call the actual
|
|
// html tree builder by emitting the token.
|
|
FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
|
|
var attribs = token.attribs || [];
|
|
if ( token.dataAttribs ) {
|
|
if ( ! token.attribs ) {
|
|
token.attribs = [];
|
|
}
|
|
attribs = attribs.concat([
|
|
{
|
|
// Mediawiki-specific round-trip / non-semantic information
|
|
k: 'data-mw',
|
|
v: JSON.stringify( token.dataAttribs )
|
|
} ] );
|
|
}
|
|
|
|
switch( token.constructor ) {
|
|
case String:
|
|
this.emit('token', {type: 'Characters', data: token});
|
|
break;
|
|
case NlTk:
|
|
break;
|
|
case TagTk:
|
|
this.emit('token', {type: 'StartTag',
|
|
name: token.name,
|
|
data: this._att(attribs)});
|
|
break;
|
|
case SelfclosingTagTk:
|
|
this.emit('token', {type: 'StartTag',
|
|
name: token.name,
|
|
data: this._att(attribs)});
|
|
if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) {
|
|
// VOID_ELEMENTS are automagically treated as self-closing by
|
|
// the tree builder
|
|
this.emit('token', {type: 'EndTag',
|
|
name: token.name,
|
|
data: this._att(attribs)});
|
|
}
|
|
break;
|
|
case EndTagTk:
|
|
this.emit('token', {type: 'EndTag',
|
|
name: token.name,
|
|
data: this._att(attribs)});
|
|
break;
|
|
case CommentTk:
|
|
this.emit('token', {type: 'Comment',
|
|
data: token.value});
|
|
break;
|
|
case EOFTk:
|
|
this.emit('end');
|
|
this.emit('token', { type: 'EOF' } );
|
|
this.document = this.parser.document;
|
|
if ( ! this.document.body ) {
|
|
// HACK: This should not be needed really.
|
|
this.document.body = this.parser.document.getElementsByTagName('body')[0];
|
|
}
|
|
// Emit the document to consumers
|
|
//this.emit('document', this.document);
|
|
break;
|
|
default:
|
|
console.warn("Unhandled token: " + JSON.stringify(token));
|
|
break;
|
|
break;
|
|
}
|
|
};
|
|
|
|
|
|
|
|
if (typeof module == "object") {
|
|
module.exports.FauxHTML5 = FauxHTML5;
|
|
}
|