From 2e35171fd1e65afd3916310d19b262c5245a0106 Mon Sep 17 00:00:00 2001 From: Gabriel Wicke Date: Wed, 4 Jan 2012 14:09:05 +0000 Subject: [PATCH] Fix quote handling and tweak the whitelist a bit. 'any' token registrations are now merged with specific registrations by rank. Not yet clear if that is a good idea overall, need to check use cases when implementing template expansion and other functionality. 183 parser test now passing. --- modules/parser/ext.core.QuoteTransformer.js | 45 ++++++++++--------- .../mediawiki.TokenTransformDispatcher.js | 18 ++++++-- tests/parser/parserTests-whitelist.js | 7 +-- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/modules/parser/ext.core.QuoteTransformer.js b/modules/parser/ext.core.QuoteTransformer.js index c4d5e4158d..e7a93dd6f7 100644 --- a/modules/parser/ext.core.QuoteTransformer.js +++ b/modules/parser/ext.core.QuoteTransformer.js @@ -5,20 +5,25 @@ */ function QuoteTransformer ( ) { - // Bold and italic tokens are collected in these lists, and then processed - // in onNewLine. this.quoteAndNewlineRank = 2.1; this.anyRank = 2.101; // Just after regular quote and newline this.reset(); } QuoteTransformer.prototype.reset = function ( ) { - this.italics = []; - this.bolds = []; + // A chunk starts with a token context around a quote token and is + // (optionally) followed by non-quote tokens. The quote token and its + // context is later replaced with the actual tag token for italic or bold. this.currentChunk = []; // List of chunks, each starting with a (potentially) bold or italic token // and followed by plain tokens. this.chunks = []; + // References to chunks in which the first token context / quote token + // should be converted to italic or bold tokens. + this.italics = []; + this.bolds = []; + + this.isActive = false; }; @@ -37,19 +42,15 @@ QuoteTransformer.prototype.register = function ( dispatcher ) { // Make a copy of the token context QuoteTransformer.prototype._startNewChunk = function ( ) { - this.currentChunk.pos = this.chunks.length; this.chunks.push( this.currentChunk ); this.currentChunk = []; + this.currentChunk.pos = this.chunks.length - 1; }; // Handle QUOTE tags. These are collected in italic/bold lists depending on // the length of quote string. Actual analysis and conversion to the // appropriate tag tokens is deferred until the next NEWLINE token triggers // onNewLine. -// -// XXX: Cannot use async stuff here, need to buffer things locally instead! -// FIXME: Convert to internal buffering! -> return all tokens with rank set to -// own rank to avoid reprocessing QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) { var qlen = token.value.length, tokens = [], // output tokens @@ -66,9 +67,10 @@ QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) { }; - if ( this.chunks.length === 0 ) { + if ( ! this.isActive ) { // register for any token if not yet active - this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'tag', 'mw-quote' ); + this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' ); + this.isActive = true; } this._startNewChunk(); @@ -114,7 +116,7 @@ QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) { break; } - return { token: null }; + return {}; }; QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) { @@ -128,28 +130,28 @@ QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) { QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) { var res; - if( ! this.chunks.length ) { + if( ! this.isActive ) { // Nothing to do, quick abort. return { token: token }; } token.rank = this.quoteAndNewlineRank; - this.currentChunk.push( token ); - this._startNewChunk(); - //console.log("onNewLine: " + this.italics + this.bolds); + //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); + + //console.log("onNewLine: " + this.italics.length + 'i/b' + this.bolds.length); // balance out tokens, convert placeholders into tags if (this.italics.length % 2 && this.bolds.length % 2) { var firstsingleletterword = -1, firstmultiletterword = -1, firstspace = -1; for (var j = 0; j < this.bolds.length; j++) { - var ctx = this.bolds[j]; + var ctx = this.bolds[j][0]; //console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2)); if (ctx.prevToken) { if (ctx.prevToken.type === 'TEXT') { - var lastchar = ctx.prevToken.value[ctx.prevToken.value.length - 1], + var lastchar = prevToken.value[ctx.prevToken.value.length - 1], secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2]; if (lastchar === ' ' && firstspace === -1) { firstspace = j; @@ -189,12 +191,15 @@ QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) this.quotesToTags( this.italics, 'i' ); this.quotesToTags( this.bolds, 'b' ); + this.currentChunk.push( token ); + this._startNewChunk(); + //console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); // return all collected tokens including the newline res = { tokens: [].concat.apply([], this.chunks) }; - // prepare for next session + // prepare for next line this.reset(); // remove 'any' registration @@ -246,7 +251,7 @@ QuoteTransformer.prototype.quotesToTags = function ( chunks, name ) { toggle = !toggle; } if (!toggle) { - // Add end tag, but don't count it towards completion. + // Add end tag this.currentChunk.push( {type: 'ENDTAG', name: name} ); } }; diff --git a/modules/parser/mediawiki.TokenTransformDispatcher.js b/modules/parser/mediawiki.TokenTransformDispatcher.js index 0dfa8c9c87..d7ff612221 100644 --- a/modules/parser/mediawiki.TokenTransformDispatcher.js +++ b/modules/parser/mediawiki.TokenTransformDispatcher.js @@ -127,7 +127,7 @@ TokenTransformDispatcher.prototype.addTransform = function ( transformation, ran } transArr.push(transformer); // sort ascending by rank - transArr.sort( function ( t1, t2 ) { return t1.rank - t2.rank; } ); + transArr.sort( this._cmpTransformations ); }; /** @@ -185,6 +185,12 @@ TokenTransformDispatcher.prototype._resetTokenRank = function ( res, transformer } }; +/** + * Comparison for sorting transformations by ascending rank. + */ +TokenTransformDispatcher.prototype._cmpTransformations = function ( a, b ) { + return a.rank - b.rank; +}; /* Call all transformers on a tag. * @@ -206,8 +212,10 @@ TokenTransformDispatcher.prototype._transformTagToken = function ( token, cb, ph tName = token.name.toLowerCase(), tagts = this.transformers[phaseEndRank].tag[tName]; - if ( tagts ) { + if ( tagts && tagts.length ) { + // could cache this per tag type to avoid re-sorting each time ts = ts.concat(tagts); + ts.sort( this._cmpTransformations ); } //console.log(JSON.stringify(ts, null, 2)); if ( ts ) { @@ -252,7 +260,11 @@ TokenTransformDispatcher.prototype._transformTagToken = function ( token, cb, ph */ TokenTransformDispatcher.prototype._transformToken = function ( token, cb, phaseEndRank, frame, ts ) { // prepend 'any' transformers - ts = this.transformers[phaseEndRank].any.concat(ts); + var anyTrans = this.transformers[phaseEndRank].any; + if ( anyTrans.length ) { + ts = this.transformers[phaseEndRank].any.concat(ts); + ts.sort( this._cmpTransformations ); + } var transformer, res = { token: token }, aborted = false; diff --git a/tests/parser/parserTests-whitelist.js b/tests/parser/parserTests-whitelist.js index 49c48b8c29..51b9c8410c 100644 --- a/tests/parser/parserTests-whitelist.js +++ b/tests/parser/parserTests-whitelist.js @@ -10,14 +10,15 @@ testWhiteList["Italics and bold"] = "