mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-28 00:00:49 +00:00
Fix quote handling and tweak the whitelist a bit. 'any' token registrations
are now merged with specific registrations by rank. Not yet clear if that is a good idea overall, need to check use cases when implementing template expansion and other functionality. 183 parser test now passing.
This commit is contained in:
parent
6cd95fea37
commit
2e35171fd1
Notes:
Gabriel Wicke
2012-02-27 16:40:01 +00:00
|
@ -5,20 +5,25 @@
|
|||
*/
|
||||
|
||||
function QuoteTransformer ( ) {
|
||||
// Bold and italic tokens are collected in these lists, and then processed
|
||||
// in onNewLine.
|
||||
this.quoteAndNewlineRank = 2.1;
|
||||
this.anyRank = 2.101; // Just after regular quote and newline
|
||||
this.reset();
|
||||
}
|
||||
|
||||
QuoteTransformer.prototype.reset = function ( ) {
|
||||
this.italics = [];
|
||||
this.bolds = [];
|
||||
// A chunk starts with a token context around a quote token and is
|
||||
// (optionally) followed by non-quote tokens. The quote token and its
|
||||
// context is later replaced with the actual tag token for italic or bold.
|
||||
this.currentChunk = [];
|
||||
// List of chunks, each starting with a (potentially) bold or italic token
|
||||
// and followed by plain tokens.
|
||||
this.chunks = [];
|
||||
// References to chunks in which the first token context / quote token
|
||||
// should be converted to italic or bold tokens.
|
||||
this.italics = [];
|
||||
this.bolds = [];
|
||||
|
||||
this.isActive = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -37,19 +42,15 @@ QuoteTransformer.prototype.register = function ( dispatcher ) {
|
|||
|
||||
// Make a copy of the token context
|
||||
QuoteTransformer.prototype._startNewChunk = function ( ) {
|
||||
this.currentChunk.pos = this.chunks.length;
|
||||
this.chunks.push( this.currentChunk );
|
||||
this.currentChunk = [];
|
||||
this.currentChunk.pos = this.chunks.length - 1;
|
||||
};
|
||||
|
||||
// Handle QUOTE tags. These are collected in italic/bold lists depending on
|
||||
// the length of quote string. Actual analysis and conversion to the
|
||||
// appropriate tag tokens is deferred until the next NEWLINE token triggers
|
||||
// onNewLine.
|
||||
//
|
||||
// XXX: Cannot use async stuff here, need to buffer things locally instead!
|
||||
// FIXME: Convert to internal buffering! -> return all tokens with rank set to
|
||||
// own rank to avoid reprocessing
|
||||
QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) {
|
||||
var qlen = token.value.length,
|
||||
tokens = [], // output tokens
|
||||
|
@ -66,9 +67,10 @@ QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) {
|
|||
};
|
||||
|
||||
|
||||
if ( this.chunks.length === 0 ) {
|
||||
if ( ! this.isActive ) {
|
||||
// register for any token if not yet active
|
||||
this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'tag', 'mw-quote' );
|
||||
this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' );
|
||||
this.isActive = true;
|
||||
}
|
||||
|
||||
this._startNewChunk();
|
||||
|
@ -114,7 +116,7 @@ QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) {
|
|||
break;
|
||||
}
|
||||
|
||||
return { token: null };
|
||||
return {};
|
||||
};
|
||||
|
||||
QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) {
|
||||
|
@ -128,28 +130,28 @@ QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) {
|
|||
QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) {
|
||||
var res;
|
||||
|
||||
if( ! this.chunks.length ) {
|
||||
if( ! this.isActive ) {
|
||||
// Nothing to do, quick abort.
|
||||
return { token: token };
|
||||
}
|
||||
|
||||
|
||||
token.rank = this.quoteAndNewlineRank;
|
||||
this.currentChunk.push( token );
|
||||
this._startNewChunk();
|
||||
|
||||
//console.log("onNewLine: " + this.italics + this.bolds);
|
||||
//console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
|
||||
|
||||
//console.log("onNewLine: " + this.italics.length + 'i/b' + this.bolds.length);
|
||||
// balance out tokens, convert placeholders into tags
|
||||
if (this.italics.length % 2 && this.bolds.length % 2) {
|
||||
var firstsingleletterword = -1,
|
||||
firstmultiletterword = -1,
|
||||
firstspace = -1;
|
||||
for (var j = 0; j < this.bolds.length; j++) {
|
||||
var ctx = this.bolds[j];
|
||||
var ctx = this.bolds[j][0];
|
||||
//console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2));
|
||||
if (ctx.prevToken) {
|
||||
if (ctx.prevToken.type === 'TEXT') {
|
||||
var lastchar = ctx.prevToken.value[ctx.prevToken.value.length - 1],
|
||||
var lastchar = prevToken.value[ctx.prevToken.value.length - 1],
|
||||
secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2];
|
||||
if (lastchar === ' ' && firstspace === -1) {
|
||||
firstspace = j;
|
||||
|
@ -189,12 +191,15 @@ QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken )
|
|||
this.quotesToTags( this.italics, 'i' );
|
||||
this.quotesToTags( this.bolds, 'b' );
|
||||
|
||||
this.currentChunk.push( token );
|
||||
this._startNewChunk();
|
||||
|
||||
//console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
|
||||
|
||||
// return all collected tokens including the newline
|
||||
res = { tokens: [].concat.apply([], this.chunks) };
|
||||
|
||||
// prepare for next session
|
||||
// prepare for next line
|
||||
this.reset();
|
||||
|
||||
// remove 'any' registration
|
||||
|
@ -246,7 +251,7 @@ QuoteTransformer.prototype.quotesToTags = function ( chunks, name ) {
|
|||
toggle = !toggle;
|
||||
}
|
||||
if (!toggle) {
|
||||
// Add end tag, but don't count it towards completion.
|
||||
// Add end tag
|
||||
this.currentChunk.push( {type: 'ENDTAG', name: name} );
|
||||
}
|
||||
};
|
||||
|
|
|
@ -127,7 +127,7 @@ TokenTransformDispatcher.prototype.addTransform = function ( transformation, ran
|
|||
}
|
||||
transArr.push(transformer);
|
||||
// sort ascending by rank
|
||||
transArr.sort( function ( t1, t2 ) { return t1.rank - t2.rank; } );
|
||||
transArr.sort( this._cmpTransformations );
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -185,6 +185,12 @@ TokenTransformDispatcher.prototype._resetTokenRank = function ( res, transformer
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Comparison for sorting transformations by ascending rank.
|
||||
*/
|
||||
TokenTransformDispatcher.prototype._cmpTransformations = function ( a, b ) {
|
||||
return a.rank - b.rank;
|
||||
};
|
||||
|
||||
/* Call all transformers on a tag.
|
||||
*
|
||||
|
@ -206,8 +212,10 @@ TokenTransformDispatcher.prototype._transformTagToken = function ( token, cb, ph
|
|||
tName = token.name.toLowerCase(),
|
||||
tagts = this.transformers[phaseEndRank].tag[tName];
|
||||
|
||||
if ( tagts ) {
|
||||
if ( tagts && tagts.length ) {
|
||||
// could cache this per tag type to avoid re-sorting each time
|
||||
ts = ts.concat(tagts);
|
||||
ts.sort( this._cmpTransformations );
|
||||
}
|
||||
//console.log(JSON.stringify(ts, null, 2));
|
||||
if ( ts ) {
|
||||
|
@ -252,7 +260,11 @@ TokenTransformDispatcher.prototype._transformTagToken = function ( token, cb, ph
|
|||
*/
|
||||
TokenTransformDispatcher.prototype._transformToken = function ( token, cb, phaseEndRank, frame, ts ) {
|
||||
// prepend 'any' transformers
|
||||
var anyTrans = this.transformers[phaseEndRank].any;
|
||||
if ( anyTrans.length ) {
|
||||
ts = this.transformers[phaseEndRank].any.concat(ts);
|
||||
ts.sort( this._cmpTransformations );
|
||||
}
|
||||
var transformer,
|
||||
res = { token: token },
|
||||
aborted = false;
|
||||
|
|
|
@ -10,14 +10,15 @@ testWhiteList["Italics and bold"] = "<ul><li> plain</li><li> plain<i>italic</i>p
|
|||
|
||||
testWhiteList["Bug 2702: Mismatched <i>, <b> and <a> tags are invalid"] = "<p><i><a href=\"http://example.com\">text</a></i><a href=\"http://example.com\" data-sourcePos=\"30:61\"><b>text</b></a><i data-sourcePos=\"62:106\">Something <a href=\"http://example.com\">in italic</a></i><i data-sourcePos=\"107:164\">Something <a href=\"http://example.com\">mixed</a></i><a href=\"http://example.com\"><b>, even bold</b></a><i data-sourcePos=\"165:204\"><b data-sourcePos=\"165:204\">Now <a href=\"http://example.com\">both</a></b></i></p>";
|
||||
|
||||
testWhiteList["Unclosed and unmatched quotes"] = "<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b></b>Bold text..</p><p>..spanning two paragraphs (should not work).<b></b></p><p><b></b>Bold tag left open</p><p><i></i>Italic tag left open</p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
|
||||
testWhiteList["Unclosed and unmatched quotes"] = "<p data-sourcePos=\"0:66\"><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i></p><p><i><b>Bold italic text </b></i><b>with italic deactivated<i> in between.</i></b></p><p><b>Bold text..</b></p><p>..spanning two paragraphs (should not work).<b></b></p><p><b>Bold tag left open</b></p><p><i>Italic tag left open</i></p><p>Normal text.<!-- Unmatching number of opening, closing tags: -->\n</p><p><b>This year'</b>s election <i>should</i> beat <b>last year'</b>s.</p><p><i>Tom<b>s car is bigger than </b></i><b>Susan</b>s.</p>";
|
||||
|
||||
testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i></i> and stuff</a>!</p>";
|
||||
// The expected result for this test is really broken html.
|
||||
testWhiteList["Link containing double-single-quotes '' in text embedded in italics (bug 4598 sanity check)"] = "<p data-sourcePos=\"0:45\"><i>Some <a data-type=\"internal\" href=\"Link\">pretty </a></i><a data-type=\"internal\" href=\"Link\">italics<i> and stuff</i></a><i>!</i></p>";
|
||||
|
||||
testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>";
|
||||
|
||||
// This is a rare edge case, and the new behavior is arguably more consistent
|
||||
testWhiteList["5 quotes, code coverage +1 line"] = "<p><i>'</i></p>";
|
||||
testWhiteList["5 quotes, code coverage +1 line"] = "<p>'<i></i></p>";
|
||||
|
||||
|
||||
// empty table tags / with only a caption are legal in HTML5.
|
||||
|
|
Loading…
Reference in a new issue