mediawiki-extensions-Visual.../modules/parser/ext.core.QuoteTransformer.js
Gabriel Wicke a8fa9433c4 Convert quote handling (italic/bold) to a core extension operating on the
token stream. This is the first token transformation exercising the
TokenTransformer class as its dispatcher. Template expansions, wiki link
formatting, tag sanitation and extensions should be able to use the same
dispatcher by registering for specific token types.

The parser performance is very slightly improved as the token stream is only
traversed once.
2011-12-12 20:53:14 +00:00

212 lines
5.8 KiB
JavaScript

/*
* Italic/Bold handling.
*
* - list of tokens
* - NEWLINE
* - ticks (2+) -> list with link in line token list?
* - process on newline
* - need access to text nodes before for conversion back to text
*/
function QuoteTransformer ( ) {
this.italics = [];
this.bolds = [];
this.inserted = 0;
}
QuoteTransformer.prototype.register = function ( tokenTransformer ) {
// Register for NEWLINE and QUOTE tag tokens
var self = this;
tokenTransformer.appendListener( function (ctx) {
return self.onNewLine(ctx);
}, 'newline' );
tokenTransformer.appendListener( function (ctx) {
return self.onQuote(ctx);
}, 'tag', 'QUOTE' );
};
// Extract a copy of the token context with the info we need
QuoteTransformer.prototype.ctx = function ( tokenCTX ) {
return {
accum: tokenCTX.accum,
token: tokenCTX.token,
lastToken: tokenCTX.lastToken,
pos: tokenCTX.pos
};
};
QuoteTransformer.prototype.onQuote = function ( tokenCTX ) {
// depending on length, add starting 's to preceding text node
// (if any)
// add token index to italic/bold lists
// add placeholder for token
var token = tokenCTX.token,
qlen = token.value.length,
out = null,
lastToken = tokenCTX.lastToken,
ctx = this.ctx(tokenCTX),
ctx2,
accum = tokenCTX.accum;
switch (qlen) {
case 2:
accum = tokenCTX.transformer.newAccumulator(accum);
this.italics.push(ctx);
break;
case 3:
accum = tokenCTX.transformer.newAccumulator(accum);
this.bolds.push(ctx);
break;
case 4:
if (lastToken && lastToken.type === 'TEXT') {
lastToken.value += "'";
} else {
out = {type: 'TEXT', value: "'"};
}
accum = tokenCTX.transformer.newAccumulator(accum);
this.bolds.push(ctx);
break;
case 5:
// order does not matter here, will be fixed
// by HTML tree builder
accum = tokenCTX.transformer.newAccumulator(accum, 2);
this.italics.push(ctx);
ctx2 = this.ctx(tokenCTX);
ctx2.token = {attribs: ctx.token.attribs};
this.bolds.push(ctx2);
break;
default: // longer than 5, only use the last 5 ticks
var newvalue = token.value.substr(0, qlen - 5 );
if (lastToken && lastToken.type === 'TEXT') {
lastToken.value += newvalue;
} else {
out = {type: 'TEXT', value: newvalue};
}
accum = tokenCTX.transformer.newAccumulator(accum, 2);
this.italics.push(ctx);
ctx2 = this.ctx(tokenCTX);
ctx2.token = {attribs: ctx.token.attribs};
this.bolds.push(ctx2);
break;
}
tokenCTX.token = out;
tokenCTX.accum = accum;
return tokenCTX;
};
QuoteTransformer.prototype.onNewLine = function ( tokenCTX ) {
if(!this.bolds && !this.italics) {
// Nothing to do, quick abort.
return tokenCTX;
}
//console.log("onNewLine: " + this.italics + this.bolds);
// balance out tokens, convert placeholders into tags
if (this.italics.length % 2 && this.bolds.length % 2) {
var firstsingleletterword = -1,
firstmultiletterword = -1,
firstspace = -1;
for (var j = 0; j < this.bolds.length; j++) {
var ctx = this.bolds[j];
//console.log("balancing!" + JSON.stringify(ctx.lastToken, null, 2));
if (ctx.lastToken) {
if (ctx.lastToken.type === 'TEXT') {
var lastchar = ctx.lastToken.value[ctx.lastToken.value.length - 1],
secondtolastchar = ctx.lastToken.value[ctx.lastToken.value.length - 2];
if (lastchar === ' ' && firstspace === -1) {
firstspace = j;
} else if (lastchar !== ' ') {
if ( secondtolastchar === ' ' &&
firstsingleletterword === -1)
{
firstsingleletterword = j;
} else if ( firstmultiletterword == -1) {
firstmultiletterword = j;
}
}
} else if ( ( ctx.lastToken.type === 'NEWLINE' ||
ctx.lastToken.type === 'TAG' ) &&
firstspace == -1 ) {
firstmultiletterword = j;
}
}
}
// now see if we can convert a bold to an italic and
// an apostrophe
if (firstsingleletterword > -1) {
this.convertBold(firstsingleletterword);
} else if (firstmultiletterword > -1) {
this.convertBold(firstmultiletterword);
} else if (firstspace > -1) {
this.convertBold(firstspace);
}
}
this.quotesToTags(this.italics, 'i', tokenCTX.transformer);
this.quotesToTags(this.bolds, 'b', tokenCTX.transformer);
this.bolds = [];
this.italics = [];
// Pass through the NEWLINE token unchanged
return tokenCTX;
};
QuoteTransformer.prototype.convertBold = function ( i ) {
var ctx = this.bolds[i];
//console.log('convertbold!');
if ( ctx.lastToken && ctx.lastToken.type === 'TEXT' ) {
ctx.lastToken.value += "'";
} else {
// Add a text token!
ctx.token = [{type: 'TEXT', value: "'"}, ctx.token];
}
this.bolds.splice(i, 1);
this.italics.push(ctx);
this.italics.sort(function(a,b) { return a.pos - b.pos; } );
//console.log(this.italics.map(function(a) { return a.pos }));
//console.log(this.bolds.map(function(a) { return a.pos }));
};
// convert italics/bolds into tags
QuoteTransformer.prototype.quotesToTags = function ( contexts, name, transformer ) {
var toggle = true,
t,
out = [];
for (var j = 0; j < contexts.length; j++) {
t = contexts[j].token;
if ( $.isArray(t) ) {
// Slip in a text token from bold to italic rebalancing
var realToken = t.pop();
transformer.transformTokens( t, contexts[j].accum, 0 );
t = realToken;
}
if(toggle) {
t.type = 'TAG';
} else {
t.type = 'ENDTAG';
}
t.name = name;
delete t.value;
toggle = !toggle;
// Re-add and process the new token with the original accumulator
transformer.transformTokens( [t], contexts[j].accum, 0 );
}
var l = contexts.length;
if (!toggle) {
// add end tag, but don't count it towards the finish
transformer.transformTokens( [{type: 'ENDTAG', name: name}],
contexts[contexts.length - 1].accum, 0 );
}
// now allow the transformer to finish
transformer.finish( contexts.length );
};
if (typeof module == "object") {
module.exports.QuoteTransformer = QuoteTransformer;
}