From 9ebce5839a20c2efa920bb8f11c51e57aedeca77 Mon Sep 17 00:00:00 2001 From: Gabriel Wicke Date: Mon, 12 Dec 2011 10:01:47 +0000 Subject: [PATCH] Further development of the TokenTransformer framework. --- modules/parser/mediawiki.TokenTransformer.js | 141 ++++++++++++++----- 1 file changed, 109 insertions(+), 32 deletions(-) diff --git a/modules/parser/mediawiki.TokenTransformer.js b/modules/parser/mediawiki.TokenTransformer.js index dd09bf2dd3..111b45bcf2 100644 --- a/modules/parser/mediawiki.TokenTransformer.js +++ b/modules/parser/mediawiki.TokenTransformer.js @@ -1,6 +1,21 @@ /* Generic token transformation dispatcher with support for asynchronous token * expansion. Individual transformations register for the token types they are - * interested in and are called on each matching token. */ + * interested in and are called on each matching token. + * + * A transformer might return null, a single token, or an array of tokens. + * - Null removes the token and stops further processing for this token. + * - A single token is further processed using the remaining transformations + * registered for this token, and finally placed in the output token list. + * - A list of tokens stops the processing for this token. Instead, processing + * restarts with the first returned token. + * + * Additionally, transformers performing asynchronous actions on a token can + * create a new TokenAccumulator using .newAccumulator(). This creates a new + * accumulator for each asynchronous result, with the asynchronously processed + * token last in its internal accumulator. This setup avoids the need to apply + * operational-transform-like index transformations when parallel expansions + * insert tokens in front of other ongoing expansion tasks. + * */ function TokenTransformer( callback ) { this.cb = callback; // Called with transformed token list when done @@ -60,62 +75,121 @@ TokenTransformer.prototype.removeListener = function ( listener, type, name ) { } }; -TokenTransformer.prototype._transformTagToken = function ( token, lastToken ) { +/* Constructor for information context relevant to token transformers + * + * @param token The token to precess + * @param accum {TokenAccumulator} The active TokenAccumulator. + * @param processor {TokenTransformer} The TokenTransformer object. + * @param lastToken Last returned token or {undefined}. + * @returns {TokenContext}. + */ +function TokenContext ( token, accum, transformer, lastToken ) { + this.token = token; + this.accum = accum; + this.transformer = transformer; + this.lastToken = lastToken; + return this; +} + +/* Call all transformers on a tag. + * + * @param {TokenContext} The current token and its context. + * @returns {TokenContext} Context with updated token and/or accum. + */ +TokenTransformer.prototype._transformTagToken = function ( tokenCTX ) { var ts = this.transformers.tag[token.name]; if ( ts ) { for (var i = 0, l = ts.length; i < l; i++ ) { - token = ts[i]( token, lastToken, this.accum, this ); - if ( token === null ) { + // Transform token with side effects + tokenCTX = ts[i]( tokenCTX ); + if ( tokenCTX.token === null || $.isArray(tokenCTX.token) ) { break; } + } } - return token; + return tokenCTX; }; -TokenTransformer.prototype._transformToken = function ( ts, token, lastToken ) { +/* Call all transformers on other token types. + * + * @param tokenCTX {TokenContext} The current token and its context. + * @param ts List of token transformers for this token type. + * @returns {TokenContext} Context with updated token and/or accum. + */ +TokenTransformer.prototype._transformToken = function ( tokenCTX, ts ) { if ( ts ) { for (var i = 0, l = ts.length; i < l; i++ ) { - token = ts[i]( token, lastToken, this.accum, this ); - if ( token === null ) { + tokenCTX = ts[i]( tokenCTX ); + if ( tokenCTX.token === null || $.isArray(tokenCTX.token) ) { break; } } } - return token; + return tokenCTX; }; -TokenTransformer.prototype.transformTokens = function ( tokens ) { - var currentOutout = []; - var lastToken; +/* Transform and expand tokens. + * + * Normally called with undefined accum. Asynchronous expansions will call + * this with their known accum, which allows expanded tokens to be spliced in + * at the appropriate location in the token list, which is always at the tail + * end of the current accumulator. + * + * @param tokens {List of tokens} Tokens to process. + * @param accum {TokenAccumulator} object. Undefined for first call, set to + * accumulator with expanded token at tail for asynchronous + * expansions. + * @returns nothing: Calls back registered callback if there are no more + * outstanding asynchronous expansions. + * */ +TokenTransformer.prototype.transformTokens = function ( tokens, accum ) { + if ( accum === undefined ) { + accum = this.accum; + } else { + // Prepare to replace the last token in the current accumulator. + accum.pop(); + } + var tokenCTX = TokenContext(undefined, accum, this, undefined); for ( var i = 0, l = tokens.length; i < l; i++ ) { - var token = tokens[i]; + tokenCTX.lastToken = tokenCTX.token; + tokenCTX.token = tokens[i]; var ts; - switch(token.type) { + switch(tokenCTX.token.type) { case 'TAG': case 'ENDTAG': case 'SELFCLOSINGTAG': - lastToken = this._transformTagToken( token, lastToken ); + tokenCTX = this._transformTagToken( tokenCTX ); break; case 'TEXT': - lastToken = this._transformToken(this.transformers.text, token, lastToken); + tokenCTX = this._transformToken( tokenCTX, this.transformers.text ); break; case 'COMMENT': - lastToken = this._transformToken(this.transformers.comment, token, lastToken); + tokenCTX = this._transformToken( tokenCTX, this.transformers.comment); break; case 'NEWLINE': - lastToken = this._transformToken(this.transformers.newline, token, lastToken); + tokenCTX = this._transformToken( tokenCTX, this.transformers.newline ); break; case 'END': - lastToken = this._transformToken(this.transformers.end, token, lastToken); + tokenCTX = this._transformToken( tokenCTX, this.transformers.end ); break; default: - lastToken = this._transformToken(this.transformers.martian, token, lastToken); + tokenCTX = this._transformToken( tokenCTX, this.transformers.martian ); break; } - if(lastToken) { - this.accum.push(lastToken); + if( $.isArray(tokenCTX.token) ) { + // Splice in the returned tokens (while replacing the original + // token), and process them next. + [].splice.apply(tokens, [i, 1].concat(tokenCTX.token)); + l += res.token.length - 1; + i--; // continue at first inserted token + } else if (tokenCTX.token) { + // push to accumulator (not necessarily the last one) + accum.push(tokenCTX.token); } + // Update current accum, in case a new one was spliced in by a + // transformation starting asynch work. + accum = tokenCTX.accum; } this.finish(); }; @@ -123,23 +197,22 @@ TokenTransformer.prototype.transformTokens = function ( tokens ) { TokenTransformer.prototype.finish = function ( ) { this.outstanding--; if ( this.outstanding === 0 ) { - // Join back the token accumulators into a single token list + // Join the token accumulators back into a single token list var a = this.firstaccum; var accums = [a.accum]; while ( a.next !== undefined ) { a = a.next; - accums.push(a.accum); + accums.concat(a.accum); } - // Call our callback with the token list - this.cb(accums.join()); + // Call our callback with the flattened token list + this.cb(accums); } }; - - -// Start a new accumulator with the given tokens. -TokenTransformer.prototype.newAccumulator = function ( tokens ) { - this.accum = this.accum.spliceAccumulator( tokens ); +/* Start a new accumulator for asynchronous work. */ +TokenTransformer.prototype.newAccumulator = function ( ) { + this.outstanding++; + return this.accum.insertAccumulator( ); }; // Token accumulators in a linked list. Using a linked list simplifies async @@ -156,7 +229,11 @@ TokenAccumulator.prototype.push = function ( token ) { this.accum.push(token); }; -TokenAccumulator.prototype.spliceAccumulator = function ( tokens ) { +TokenAccumulator.prototype.pop = function ( ) { + return this.accum.pop(); +}; + +TokenAccumulator.prototype.insertAccumulator = function ( ) { this.next = new TokenAccumulator(this.next, tokens); return this.next; };