2011-12-12 20:53:14 +00:00
|
|
|
/*
|
2012-01-03 18:44:31 +00:00
|
|
|
* MediaWiki-compatible italic/bold handling as a token stream transformation.
|
2011-12-12 20:53:14 +00:00
|
|
|
*
|
2012-01-03 18:44:31 +00:00
|
|
|
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
2011-12-12 20:53:14 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
function QuoteTransformer ( ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
this.reset();
|
|
|
|
}
|
|
|
|
|
2012-01-06 17:19:14 +00:00
|
|
|
// constants
|
|
|
|
QuoteTransformer.prototype.quoteAndNewlineRank = 2.1;
|
|
|
|
QuoteTransformer.prototype.anyRank = 2.101; // Just after regular quote and newline
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
QuoteTransformer.prototype.reset = function ( ) {
|
2012-01-04 14:09:05 +00:00
|
|
|
// A chunk starts with a token context around a quote token and is
|
|
|
|
// (optionally) followed by non-quote tokens. The quote token and its
|
|
|
|
// context is later replaced with the actual tag token for italic or bold.
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk = [];
|
|
|
|
// List of chunks, each starting with a (potentially) bold or italic token
|
|
|
|
// and followed by plain tokens.
|
|
|
|
this.chunks = [];
|
2012-01-04 14:09:05 +00:00
|
|
|
// References to chunks in which the first token context / quote token
|
|
|
|
// should be converted to italic or bold tokens.
|
|
|
|
this.italics = [];
|
|
|
|
this.bolds = [];
|
|
|
|
|
|
|
|
this.isActive = false;
|
2012-01-03 18:44:31 +00:00
|
|
|
};
|
|
|
|
|
2011-12-12 20:53:14 +00:00
|
|
|
|
2011-12-13 10:25:18 +00:00
|
|
|
// Register this transformer with the TokenTransformer
|
2011-12-13 11:45:12 +00:00
|
|
|
QuoteTransformer.prototype.register = function ( dispatcher ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
this.dispatcher = dispatcher;
|
2011-12-12 20:53:14 +00:00
|
|
|
// Register for NEWLINE and QUOTE tag tokens
|
2012-01-03 18:44:31 +00:00
|
|
|
dispatcher.addTransform( this.onNewLine.bind(this),
|
|
|
|
this.quoteAndNewlineRank, 'newline' );
|
|
|
|
dispatcher.addTransform( this.onQuote.bind(this),
|
|
|
|
this.quoteAndNewlineRank, 'tag', 'mw-quote' );
|
|
|
|
// Reset internal state when we are done
|
|
|
|
dispatcher.addTransform( this.reset.bind(this),
|
|
|
|
this.quoteAndNewlineRank, 'end' );
|
2011-12-12 20:53:14 +00:00
|
|
|
};
|
|
|
|
|
2011-12-13 11:18:15 +00:00
|
|
|
// Make a copy of the token context
|
2012-01-03 18:44:31 +00:00
|
|
|
QuoteTransformer.prototype._startNewChunk = function ( ) {
|
|
|
|
this.chunks.push( this.currentChunk );
|
|
|
|
this.currentChunk = [];
|
2012-01-04 14:09:05 +00:00
|
|
|
this.currentChunk.pos = this.chunks.length - 1;
|
2011-12-12 20:53:14 +00:00
|
|
|
};
|
|
|
|
|
2011-12-13 10:25:18 +00:00
|
|
|
// Handle QUOTE tags. These are collected in italic/bold lists depending on
|
|
|
|
// the length of quote string. Actual analysis and conversion to the
|
|
|
|
// appropriate tag tokens is deferred until the next NEWLINE token triggers
|
|
|
|
// onNewLine.
|
2012-01-03 18:44:31 +00:00
|
|
|
QuoteTransformer.prototype.onQuote = function ( token, cb, frame, prevToken ) {
|
|
|
|
var qlen = token.value.length,
|
|
|
|
tokens = [], // output tokens
|
|
|
|
ctx = {
|
|
|
|
token: token,
|
|
|
|
cb: cb,
|
|
|
|
frame: frame,
|
|
|
|
prevToken: prevToken
|
|
|
|
},
|
|
|
|
ctx2 = {
|
|
|
|
cb: cb,
|
|
|
|
frame: frame,
|
|
|
|
prevToken: prevToken
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2012-01-04 14:09:05 +00:00
|
|
|
if ( ! this.isActive ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
// register for any token if not yet active
|
2012-01-04 14:09:05 +00:00
|
|
|
this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' );
|
|
|
|
this.isActive = true;
|
2012-01-03 18:44:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
this._startNewChunk();
|
2011-12-13 10:25:18 +00:00
|
|
|
|
2011-12-12 20:53:14 +00:00
|
|
|
switch (qlen) {
|
|
|
|
case 2:
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk.push(ctx);
|
|
|
|
this.italics.push(this.currentChunk);
|
2011-12-12 20:53:14 +00:00
|
|
|
break;
|
|
|
|
case 3:
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk.push(ctx);
|
|
|
|
this.bolds.push(this.currentChunk);
|
2011-12-12 20:53:14 +00:00
|
|
|
break;
|
|
|
|
case 4:
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk.push( {type: 'TEXT', value: "'"} );
|
|
|
|
this._startNewChunk();
|
|
|
|
this.currentChunk.push(ctx);
|
|
|
|
this.bolds.push(this.currentChunk);
|
2011-12-12 20:53:14 +00:00
|
|
|
break;
|
|
|
|
case 5:
|
2011-12-13 10:25:18 +00:00
|
|
|
// The order of italic vs. bold does not matter. Those are
|
|
|
|
// processed in a fixed order, and any nesting issues are fixed up
|
|
|
|
// by the HTML 5 tree builder. This does not always result in the
|
|
|
|
// prettiest result, but at least it is always correct and very
|
|
|
|
// convenient.
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk.push(ctx);
|
|
|
|
this.italics.push(this.currentChunk);
|
|
|
|
this._startNewChunk();
|
|
|
|
ctx2.token = { attribs: token.attribs };
|
|
|
|
this.currentChunk.push(ctx2);
|
|
|
|
this.bolds.push(this.currentChunk);
|
2011-12-12 20:53:14 +00:00
|
|
|
break;
|
|
|
|
default: // longer than 5, only use the last 5 ticks
|
|
|
|
var newvalue = token.value.substr(0, qlen - 5 );
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk.push ( {type: 'TEXT', value: newvalue} );
|
|
|
|
this._startNewChunk();
|
|
|
|
this.currentChunk.push(ctx);
|
|
|
|
this.italics.push(this.currentChunk);
|
|
|
|
this._startNewChunk();
|
|
|
|
ctx2.token = { attribs: ctx.token.attribs };
|
|
|
|
this.currentChunk.push(ctx2);
|
|
|
|
this.bolds.push(this.currentChunk);
|
2011-12-12 20:53:14 +00:00
|
|
|
break;
|
|
|
|
}
|
2011-12-13 10:25:18 +00:00
|
|
|
|
2012-01-04 14:09:05 +00:00
|
|
|
return {};
|
2012-01-03 18:44:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
QuoteTransformer.prototype.onAny = function ( token, cb, frame, prevToken ) {
|
|
|
|
//console.log('qt onAny: ' + JSON.stringify(token, null, 2));
|
|
|
|
this.currentChunk.push( token );
|
|
|
|
return {};
|
2011-12-12 20:53:14 +00:00
|
|
|
};
|
|
|
|
|
2011-12-13 10:25:18 +00:00
|
|
|
// Handle NEWLINE tokens, which trigger the actual quote analysis on the
|
|
|
|
// collected quote tokens so far.
|
2012-01-03 18:44:31 +00:00
|
|
|
QuoteTransformer.prototype.onNewLine = function ( token, cb, frame, prevToken ) {
|
|
|
|
var res;
|
|
|
|
|
2012-01-04 14:09:05 +00:00
|
|
|
if( ! this.isActive ) {
|
2011-12-12 20:53:14 +00:00
|
|
|
// Nothing to do, quick abort.
|
2012-01-03 18:44:31 +00:00
|
|
|
return { token: token };
|
2011-12-12 20:53:14 +00:00
|
|
|
}
|
2012-01-03 18:44:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
token.rank = this.quoteAndNewlineRank;
|
|
|
|
|
2012-01-04 14:09:05 +00:00
|
|
|
//console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
|
|
|
|
|
|
|
|
//console.log("onNewLine: " + this.italics.length + 'i/b' + this.bolds.length);
|
2011-12-12 20:53:14 +00:00
|
|
|
// balance out tokens, convert placeholders into tags
|
|
|
|
if (this.italics.length % 2 && this.bolds.length % 2) {
|
|
|
|
var firstsingleletterword = -1,
|
|
|
|
firstmultiletterword = -1,
|
|
|
|
firstspace = -1;
|
|
|
|
for (var j = 0; j < this.bolds.length; j++) {
|
2012-01-04 14:09:05 +00:00
|
|
|
var ctx = this.bolds[j][0];
|
2012-01-03 18:44:31 +00:00
|
|
|
//console.log("balancing!" + JSON.stringify(ctx.prevToken, null, 2));
|
|
|
|
if (ctx.prevToken) {
|
|
|
|
if (ctx.prevToken.type === 'TEXT') {
|
2012-01-04 14:09:05 +00:00
|
|
|
var lastchar = prevToken.value[ctx.prevToken.value.length - 1],
|
2012-01-03 18:44:31 +00:00
|
|
|
secondtolastchar = ctx.prevToken.value[ctx.prevToken.value.length - 2];
|
2011-12-12 20:53:14 +00:00
|
|
|
if (lastchar === ' ' && firstspace === -1) {
|
|
|
|
firstspace = j;
|
|
|
|
} else if (lastchar !== ' ') {
|
|
|
|
if ( secondtolastchar === ' ' &&
|
|
|
|
firstsingleletterword === -1)
|
|
|
|
{
|
|
|
|
firstsingleletterword = j;
|
|
|
|
} else if ( firstmultiletterword == -1) {
|
|
|
|
firstmultiletterword = j;
|
|
|
|
}
|
|
|
|
}
|
2012-01-03 18:44:31 +00:00
|
|
|
} else if ( ( ctx.prevToken.type === 'NEWLINE' ||
|
|
|
|
ctx.prevToken.type === 'TAG' ) &&
|
2011-12-13 10:25:18 +00:00
|
|
|
firstmultiletterword == -1 ) {
|
|
|
|
// This is an approximation, as the original doQuotes
|
|
|
|
// operates on the source and just looks at space vs.
|
|
|
|
// non-space. At least some tags are thus recognized as
|
|
|
|
// words in the original implementation.
|
2011-12-12 20:53:14 +00:00
|
|
|
firstmultiletterword = j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// now see if we can convert a bold to an italic and
|
|
|
|
// an apostrophe
|
|
|
|
if (firstsingleletterword > -1) {
|
|
|
|
this.convertBold(firstsingleletterword);
|
|
|
|
} else if (firstmultiletterword > -1) {
|
|
|
|
this.convertBold(firstmultiletterword);
|
|
|
|
} else if (firstspace > -1) {
|
|
|
|
this.convertBold(firstspace);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
this.quotesToTags( this.italics, 'i' );
|
|
|
|
this.quotesToTags( this.bolds, 'b' );
|
2011-12-12 20:53:14 +00:00
|
|
|
|
2012-01-04 14:09:05 +00:00
|
|
|
this.currentChunk.push( token );
|
|
|
|
this._startNewChunk();
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
//console.log('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
|
|
|
|
|
|
|
|
// return all collected tokens including the newline
|
|
|
|
res = { tokens: [].concat.apply([], this.chunks) };
|
|
|
|
|
2012-01-04 14:09:05 +00:00
|
|
|
// prepare for next line
|
2012-01-03 18:44:31 +00:00
|
|
|
this.reset();
|
|
|
|
|
|
|
|
// remove 'any' registration
|
|
|
|
this.dispatcher.removeTransform( this.anyRank, 'any' );
|
|
|
|
|
|
|
|
return res;
|
2011-12-12 20:53:14 +00:00
|
|
|
};
|
|
|
|
|
2011-12-13 10:25:18 +00:00
|
|
|
// Convert a bold token to italic to balance an uneven number of both bold and
|
|
|
|
// italic tags. In the process, one quote needs to be converted back to text.
|
2011-12-12 20:53:14 +00:00
|
|
|
QuoteTransformer.prototype.convertBold = function ( i ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
var chunk = this.bolds[i],
|
|
|
|
textToken = { type: 'TEXT', value: "'" };
|
2011-12-12 20:53:14 +00:00
|
|
|
//console.log('convertbold!');
|
2012-01-03 18:44:31 +00:00
|
|
|
if ( chunk.pos ) {
|
|
|
|
this.chunks[chunk.pos - 1].push( textToken );
|
2011-12-12 20:53:14 +00:00
|
|
|
} else {
|
2012-01-03 18:44:31 +00:00
|
|
|
// prepend another chunk
|
|
|
|
this.chunks.unshift( [ textToken ] );
|
2011-12-12 20:53:14 +00:00
|
|
|
}
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
// delete from bolds
|
2011-12-12 20:53:14 +00:00
|
|
|
this.bolds.splice(i, 1);
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
this.italics.push(chunk);
|
2011-12-12 20:53:14 +00:00
|
|
|
this.italics.sort(function(a,b) { return a.pos - b.pos; } );
|
|
|
|
};
|
|
|
|
|
2011-12-13 10:25:18 +00:00
|
|
|
// Convert italics/bolds into tags
|
2012-01-03 18:44:31 +00:00
|
|
|
QuoteTransformer.prototype.quotesToTags = function ( chunks, name ) {
|
2011-12-12 20:53:14 +00:00
|
|
|
var toggle = true,
|
|
|
|
t,
|
2012-01-03 18:44:31 +00:00
|
|
|
j,
|
2011-12-12 20:53:14 +00:00
|
|
|
out = [];
|
2012-01-03 18:44:31 +00:00
|
|
|
|
|
|
|
for (j = 0; j < chunks.length; j++) {
|
|
|
|
//console.log( 'quotesToTags ' + name + ': ' + JSON.stringify( chunks, null, 2 ) );
|
|
|
|
t = chunks[j][0].token;
|
|
|
|
//console.log( 'quotesToTags t: ' + JSON.stringify( t, null, 2));
|
2011-12-12 20:53:14 +00:00
|
|
|
|
|
|
|
if(toggle) {
|
|
|
|
t.type = 'TAG';
|
|
|
|
} else {
|
|
|
|
t.type = 'ENDTAG';
|
|
|
|
}
|
|
|
|
t.name = name;
|
|
|
|
delete t.value;
|
2012-01-03 18:44:31 +00:00
|
|
|
chunks[j][0] = t;
|
2011-12-12 20:53:14 +00:00
|
|
|
toggle = !toggle;
|
|
|
|
}
|
|
|
|
if (!toggle) {
|
2012-01-04 14:09:05 +00:00
|
|
|
// Add end tag
|
2012-01-03 18:44:31 +00:00
|
|
|
this.currentChunk.push( {type: 'ENDTAG', name: name} );
|
2011-12-12 20:53:14 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (typeof module == "object") {
|
|
|
|
module.exports.QuoteTransformer = QuoteTransformer;
|
|
|
|
}
|