2012-07-18 20:23:12 +00:00
|
|
|
require('./core-upgrade.js');
|
|
|
|
var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer;
|
|
|
|
var WikitextConstants = require('./mediawiki.wikitext.constants.js').WikitextConstants;
|
2012-07-24 18:07:53 +00:00
|
|
|
var Util = require('./mediawiki.Util.js').Util;
|
2012-07-18 20:23:12 +00:00
|
|
|
|
2012-05-16 14:53:18 +00:00
|
|
|
/**
|
|
|
|
* Serializes a chunk of tokens or an HTML DOM to MediaWiki's wikitext flavor.
|
2012-07-27 00:27:37 +00:00
|
|
|
*
|
2012-05-16 14:53:18 +00:00
|
|
|
* @class
|
|
|
|
* @constructor
|
|
|
|
* @param options {Object} List of options for serialization
|
|
|
|
*/
|
|
|
|
WikitextSerializer = function( options ) {
|
|
|
|
this.options = $.extend( {
|
|
|
|
// defaults
|
|
|
|
}, options || {} );
|
|
|
|
};
|
|
|
|
|
|
|
|
var WSP = WikitextSerializer.prototype;
|
|
|
|
|
2012-06-22 22:51:50 +00:00
|
|
|
/* *********************************************************************
|
|
|
|
* Here is what the state attributes mean:
|
|
|
|
*
|
|
|
|
* listStack
|
|
|
|
* Stack of list contexts to let us emit wikitext for nested lists.
|
|
|
|
* Each context keeps track of 3 values:
|
|
|
|
* - itemBullet: the wikitext bullet char for this list
|
2012-07-27 00:27:37 +00:00
|
|
|
* - itemCount : # of list items encountered so far for the list
|
2012-06-22 22:51:50 +00:00
|
|
|
* - bullets : cumulative bullet prefix based on all the lists
|
|
|
|
* that enclose the current list
|
|
|
|
*
|
|
|
|
* onNewline
|
|
|
|
* true on start of file or after a new line has been emitted.
|
|
|
|
*
|
|
|
|
* onStartOfLine
|
|
|
|
* true when onNewline is true, and also in other start-of-line contexts
|
|
|
|
* Ex: after a comment has been emitted, or after include/noinclude tokens.
|
|
|
|
*
|
|
|
|
* singleLineMode
|
|
|
|
* - if (> 0), we cannot emit any newlines.
|
|
|
|
* - this value changes as we entire/exit dom subtrees that require
|
|
|
|
* single-line wikitext output. WSP._tagHandlers specify single-line
|
|
|
|
* mode for individual tags.
|
|
|
|
*
|
|
|
|
* availableNewlineCount
|
|
|
|
* # of newlines that have been encountered so far but not emitted yet.
|
|
|
|
* Newlines are buffered till they need to be output. This lets us
|
|
|
|
* swallow newlines in contexts where they shouldn't be emitted for
|
|
|
|
* ensuring equivalent wikitext output. (ex dom: ..</li>\n\n</li>..)
|
|
|
|
* ********************************************************************* */
|
2012-06-28 04:43:35 +00:00
|
|
|
|
2012-06-22 22:51:50 +00:00
|
|
|
WSP.initialState = {
|
2012-06-19 13:24:13 +00:00
|
|
|
listStack: [],
|
2012-06-22 22:51:50 +00:00
|
|
|
onNewline: true,
|
|
|
|
onStartOfLine : true,
|
|
|
|
availableNewlineCount: 0,
|
2012-06-28 15:40:20 +00:00
|
|
|
singleLineMode: 0,
|
|
|
|
tokens: []
|
2012-06-19 13:24:13 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
WSP.escapeWikiText = function ( state, text ) {
|
|
|
|
// tokenize the text
|
|
|
|
var p = new PegTokenizer( state.env ),
|
|
|
|
tokens = [];
|
2012-07-27 00:27:37 +00:00
|
|
|
p.on('chunk', function ( chunk ) {
|
2012-06-19 13:24:13 +00:00
|
|
|
//console.warn( JSON.stringify(chunk));
|
|
|
|
tokens.push.apply( tokens, chunk );
|
|
|
|
});
|
2012-07-27 00:27:37 +00:00
|
|
|
p.on('end', function(){
|
2012-06-19 13:24:13 +00:00
|
|
|
//console.warn( JSON.stringify('end'));
|
|
|
|
});
|
|
|
|
// this is synchronous for now, will still need sync version later, or
|
|
|
|
// alternatively make text processing in the serializer async
|
2012-06-20 17:28:34 +00:00
|
|
|
var prefixedText = text;
|
2012-06-27 05:08:47 +00:00
|
|
|
var inNewlineContext = state.onNewLine;
|
Use _inNewlineContext method instead of bare onNewline
This makes sure that we escape start-of-line syntax when needed, since
onNewline is often not yet set.
Discussion / background:
[19:18] <subbu> this will fix it, but, i think this is asking for another
minor refactoring of these flags ... because this is a subtle fix which means
it might be possible to make it clearer. onNewline is one true in on
direction, i.e. if true, we are in a new line state, but if we are in a
newline context, onNewline is not true, which is why this new method is
needed.
[19:19] <subbu> i dont know if it is possible, but it seems like it shoudl be
possible. but, something for later.
[19:20] <subbu> badly phraed. "onNewline" ==> in new line context, but if in
new line context, onNewline may be false.
[19:20] <gwicke> we should perhaps update it as early as possible instead
[19:21] <subbu> i cannot today, but possible monday. i am heading out in
about 15-30 mins.
[19:22] <gwicke> will need to check all conditions depending on it in
_serializeToken
[19:22] <subbu> oh, i misunderstood you :)
[19:22] <gwicke> and if there are cases where the onNewline / onStartOfLine
state could be reverted later
[19:23] <subbu> you were referring to the flag, i thought you meant we should
fix this sooner than later.
[19:23] <gwicke> yes, I wasn't terribly clear
[19:23] <gwicke> you wrote something about following productions swallowing
newlines, but I think we don't actually do that any more
[19:24] <gwicke> I'm quite optimistic that updating those flags much earlier
would work
[19:25] <subbu> yes, it could fix it.
[19:26] <subbu> you might be right reg. swallowing. it was happening earlier.
but, not right now, after single-line mode and other fixes.
Change-Id: Ic1d8141c04eb54a59977d0ba87bcf06bafd421e0
2012-06-23 16:53:28 +00:00
|
|
|
if ( ! inNewlineContext ) {
|
2012-06-19 13:24:13 +00:00
|
|
|
// Prefix '_' so that no start-of-line wiki syntax matches. Strip it from
|
|
|
|
// the result.
|
2012-06-20 17:28:34 +00:00
|
|
|
prefixedText = '_' + text;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( state.inIndentPre ) {
|
|
|
|
prefixedText = prefixedText.replace(/(\r?\n)/g, '$1_');
|
|
|
|
}
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
// FIXME: parse using
|
2012-06-20 17:28:34 +00:00
|
|
|
p.process( prefixedText );
|
|
|
|
|
Use _inNewlineContext method instead of bare onNewline
This makes sure that we escape start-of-line syntax when needed, since
onNewline is often not yet set.
Discussion / background:
[19:18] <subbu> this will fix it, but, i think this is asking for another
minor refactoring of these flags ... because this is a subtle fix which means
it might be possible to make it clearer. onNewline is one true in on
direction, i.e. if true, we are in a new line state, but if we are in a
newline context, onNewline is not true, which is why this new method is
needed.
[19:19] <subbu> i dont know if it is possible, but it seems like it shoudl be
possible. but, something for later.
[19:20] <subbu> badly phraed. "onNewline" ==> in new line context, but if in
new line context, onNewline may be false.
[19:20] <gwicke> we should perhaps update it as early as possible instead
[19:21] <subbu> i cannot today, but possible monday. i am heading out in
about 15-30 mins.
[19:22] <gwicke> will need to check all conditions depending on it in
_serializeToken
[19:22] <subbu> oh, i misunderstood you :)
[19:22] <gwicke> and if there are cases where the onNewline / onStartOfLine
state could be reverted later
[19:23] <subbu> you were referring to the flag, i thought you meant we should
fix this sooner than later.
[19:23] <gwicke> yes, I wasn't terribly clear
[19:23] <gwicke> you wrote something about following productions swallowing
newlines, but I think we don't actually do that any more
[19:24] <gwicke> I'm quite optimistic that updating those flags much earlier
would work
[19:25] <subbu> yes, it could fix it.
[19:26] <subbu> you might be right reg. swallowing. it was happening earlier.
but, not right now, after single-line mode and other fixes.
Change-Id: Ic1d8141c04eb54a59977d0ba87bcf06bafd421e0
2012-06-23 16:53:28 +00:00
|
|
|
if ( ! inNewlineContext ) {
|
2012-06-19 13:24:13 +00:00
|
|
|
// now strip the leading underscore.
|
|
|
|
if ( tokens[0] === '_' ) {
|
|
|
|
tokens.shift();
|
|
|
|
} else {
|
|
|
|
tokens[0] = tokens[0].substr(1);
|
|
|
|
}
|
|
|
|
}
|
2012-06-20 17:28:34 +00:00
|
|
|
|
|
|
|
// state.inIndentPre is handled on the complete output
|
|
|
|
|
2012-06-19 13:24:13 +00:00
|
|
|
//
|
|
|
|
// wrap any run of non-text tokens into <nowiki> tags using the source
|
|
|
|
// offsets of top-level productions
|
|
|
|
// return the updated text
|
|
|
|
var outTexts = [],
|
|
|
|
nonTextTokenAccum = [],
|
|
|
|
cursor = 0;
|
|
|
|
function wrapNonTextTokens () {
|
|
|
|
if ( nonTextTokenAccum.length ) {
|
|
|
|
var missingRangeEnd = false;
|
|
|
|
// TODO: make sure the source positions are always set!
|
|
|
|
// The start range
|
|
|
|
var startRange = nonTextTokenAccum[0].dataAttribs.tsr,
|
|
|
|
rangeStart, rangeEnd;
|
|
|
|
if ( ! startRange ) {
|
|
|
|
console.warn( 'No tsr on ' + nonTextTokenAccum[0] );
|
|
|
|
rangeStart = cursor;
|
|
|
|
} else {
|
|
|
|
rangeStart = startRange[0];
|
Use _inNewlineContext method instead of bare onNewline
This makes sure that we escape start-of-line syntax when needed, since
onNewline is often not yet set.
Discussion / background:
[19:18] <subbu> this will fix it, but, i think this is asking for another
minor refactoring of these flags ... because this is a subtle fix which means
it might be possible to make it clearer. onNewline is one true in on
direction, i.e. if true, we are in a new line state, but if we are in a
newline context, onNewline is not true, which is why this new method is
needed.
[19:19] <subbu> i dont know if it is possible, but it seems like it shoudl be
possible. but, something for later.
[19:20] <subbu> badly phraed. "onNewline" ==> in new line context, but if in
new line context, onNewline may be false.
[19:20] <gwicke> we should perhaps update it as early as possible instead
[19:21] <subbu> i cannot today, but possible monday. i am heading out in
about 15-30 mins.
[19:22] <gwicke> will need to check all conditions depending on it in
_serializeToken
[19:22] <subbu> oh, i misunderstood you :)
[19:22] <gwicke> and if there are cases where the onNewline / onStartOfLine
state could be reverted later
[19:23] <subbu> you were referring to the flag, i thought you meant we should
fix this sooner than later.
[19:23] <gwicke> yes, I wasn't terribly clear
[19:23] <gwicke> you wrote something about following productions swallowing
newlines, but I think we don't actually do that any more
[19:24] <gwicke> I'm quite optimistic that updating those flags much earlier
would work
[19:25] <subbu> yes, it could fix it.
[19:26] <subbu> you might be right reg. swallowing. it was happening earlier.
but, not right now, after single-line mode and other fixes.
Change-Id: Ic1d8141c04eb54a59977d0ba87bcf06bafd421e0
2012-06-23 16:53:28 +00:00
|
|
|
if ( ! inNewlineContext ) {
|
2012-06-19 13:24:13 +00:00
|
|
|
// compensate for underscore.
|
|
|
|
rangeStart--;
|
|
|
|
}
|
|
|
|
cursor = rangeStart;
|
|
|
|
}
|
|
|
|
|
|
|
|
var endRange = nonTextTokenAccum.last().dataAttribs.tsr;
|
|
|
|
if ( ! endRange ) {
|
|
|
|
// FIXME: improve this!
|
|
|
|
//rangeEnd = state.env.tokensToString( tokens ).length;
|
|
|
|
// Be conservative and extend the range to the end for now.
|
|
|
|
// Alternatives: only extend it to the next token with range
|
|
|
|
// info on it.
|
|
|
|
missingRangeEnd = true;
|
|
|
|
rangeEnd = text.length;
|
|
|
|
} else {
|
|
|
|
rangeEnd = endRange[1];
|
Use _inNewlineContext method instead of bare onNewline
This makes sure that we escape start-of-line syntax when needed, since
onNewline is often not yet set.
Discussion / background:
[19:18] <subbu> this will fix it, but, i think this is asking for another
minor refactoring of these flags ... because this is a subtle fix which means
it might be possible to make it clearer. onNewline is one true in on
direction, i.e. if true, we are in a new line state, but if we are in a
newline context, onNewline is not true, which is why this new method is
needed.
[19:19] <subbu> i dont know if it is possible, but it seems like it shoudl be
possible. but, something for later.
[19:20] <subbu> badly phraed. "onNewline" ==> in new line context, but if in
new line context, onNewline may be false.
[19:20] <gwicke> we should perhaps update it as early as possible instead
[19:21] <subbu> i cannot today, but possible monday. i am heading out in
about 15-30 mins.
[19:22] <gwicke> will need to check all conditions depending on it in
_serializeToken
[19:22] <subbu> oh, i misunderstood you :)
[19:22] <gwicke> and if there are cases where the onNewline / onStartOfLine
state could be reverted later
[19:23] <subbu> you were referring to the flag, i thought you meant we should
fix this sooner than later.
[19:23] <gwicke> yes, I wasn't terribly clear
[19:23] <gwicke> you wrote something about following productions swallowing
newlines, but I think we don't actually do that any more
[19:24] <gwicke> I'm quite optimistic that updating those flags much earlier
would work
[19:25] <subbu> yes, it could fix it.
[19:26] <subbu> you might be right reg. swallowing. it was happening earlier.
but, not right now, after single-line mode and other fixes.
Change-Id: Ic1d8141c04eb54a59977d0ba87bcf06bafd421e0
2012-06-23 16:53:28 +00:00
|
|
|
if ( ! inNewlineContext ) {
|
2012-06-19 13:24:13 +00:00
|
|
|
// compensate for underscore.
|
|
|
|
rangeEnd--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
var escapedSource = text.substr( rangeStart, rangeEnd - rangeStart )
|
2012-06-19 13:24:13 +00:00
|
|
|
.replace( /<(\/?nowiki)>/g, '<$1>' );
|
|
|
|
outTexts.push( '<nowiki>' );
|
|
|
|
outTexts.push( escapedSource );
|
|
|
|
outTexts.push( '</nowiki>' );
|
|
|
|
cursor += 17 + escapedSource.length;
|
|
|
|
if ( missingRangeEnd ) {
|
|
|
|
throw 'No tsr on end token: ' + nonTextTokenAccum.last();
|
|
|
|
}
|
|
|
|
nonTextTokenAccum = [];
|
|
|
|
}
|
|
|
|
}
|
2012-06-27 05:08:47 +00:00
|
|
|
|
2012-06-19 13:24:13 +00:00
|
|
|
try {
|
|
|
|
for ( var i = 0, l = tokens.length; i < l; i++ ) {
|
|
|
|
var token = tokens[i];
|
|
|
|
switch ( token.constructor ) {
|
|
|
|
case String:
|
|
|
|
wrapNonTextTokens();
|
2012-06-23 15:06:10 +00:00
|
|
|
outTexts.push(
|
2012-06-26 12:18:21 +00:00
|
|
|
token
|
|
|
|
// Angle brackets forming HTML tags are picked up as
|
|
|
|
// tags and escaped with nowiki. Remaining angle
|
|
|
|
// brackets can remain unescaped in the wikitext. They
|
|
|
|
// are entity-escaped by the HTML5 DOM serializer when
|
|
|
|
// outputting the HTML DOM.
|
|
|
|
//.replace(/</g, '<').replace(/>/g, '>')
|
2012-06-23 15:06:10 +00:00
|
|
|
);
|
2012-06-19 13:24:13 +00:00
|
|
|
cursor += token.length;
|
|
|
|
break;
|
|
|
|
case NlTk:
|
|
|
|
wrapNonTextTokens();
|
|
|
|
outTexts.push( '\n' );
|
|
|
|
cursor++;
|
|
|
|
break;
|
|
|
|
case EOFTk:
|
|
|
|
wrapNonTextTokens();
|
|
|
|
break;
|
2012-06-26 12:18:21 +00:00
|
|
|
case TagTk:
|
2012-06-28 12:57:05 +00:00
|
|
|
case SelfclosingTagTk:
|
|
|
|
var argDict = state.env.KVtoHash( token.attribs );
|
2012-07-23 21:24:00 +00:00
|
|
|
if ( argDict['typeof'] === 'mw:Placeholder' &&
|
2012-06-26 12:18:21 +00:00
|
|
|
// XXX: move the decision whether to escape or not
|
|
|
|
// into individual handlers!
|
2012-07-27 00:27:37 +00:00
|
|
|
token.dataAttribs.src )
|
2012-06-26 12:18:21 +00:00
|
|
|
{
|
|
|
|
wrapNonTextTokens();
|
|
|
|
// push out the original source
|
|
|
|
// XXX: This assumes the content was not
|
|
|
|
// modified for now.
|
2012-06-29 21:02:21 +00:00
|
|
|
outTexts.push( token.dataAttribs.src
|
|
|
|
// escape ampersands in entity text
|
2012-06-29 21:38:10 +00:00
|
|
|
.replace(/&(#?[0-9a-zA-Z]{2,20};)/, '&$1') );
|
2012-06-26 12:18:21 +00:00
|
|
|
// skip generated tokens
|
|
|
|
for ( ; i < l; i ++) {
|
|
|
|
var tk = tokens[i];
|
|
|
|
if ( tk.constructor === EndTagTk &&
|
|
|
|
tk.name === token.name ) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
nonTextTokenAccum.push(token);
|
|
|
|
}
|
|
|
|
break;
|
2012-06-19 13:24:13 +00:00
|
|
|
default:
|
|
|
|
//console.warn('pushing ' + token);
|
|
|
|
nonTextTokenAccum.push(token);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch ( e ) {
|
|
|
|
console.warn( e );
|
|
|
|
}
|
|
|
|
//console.warn( 'escaped wikiText: ' + outTexts.join('') );
|
2012-06-20 17:28:34 +00:00
|
|
|
var res = outTexts.join('');
|
|
|
|
if ( state.inIndentPre ) {
|
|
|
|
return res.replace(/\n_/g, '\n');
|
|
|
|
} else {
|
|
|
|
return res;
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
};
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
var id = function(v) {
|
|
|
|
return function( state ) {
|
|
|
|
return v;
|
|
|
|
};
|
2012-06-02 14:39:33 +00:00
|
|
|
};
|
|
|
|
|
2012-07-18 23:19:52 +00:00
|
|
|
var installCollector = function ( collectorConstructor, cb, handler, state, token ) {
|
|
|
|
state.tokenCollector = new collectorConstructor( token, cb, handler );
|
|
|
|
return '';
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
2012-07-16 23:17:12 +00:00
|
|
|
var endTagMatchTokenCollector = function ( tk, cb ) {
|
2012-07-13 20:57:04 +00:00
|
|
|
var tokens = [tk];
|
|
|
|
|
|
|
|
return {
|
2012-07-16 23:17:12 +00:00
|
|
|
cb: cb,
|
2012-07-13 20:57:04 +00:00
|
|
|
collect: function ( state, token ) {
|
|
|
|
tokens.push( token );
|
|
|
|
if ( token.constructor === EndTagTk &&
|
|
|
|
token.name === tk.name ) {
|
2012-07-16 23:17:12 +00:00
|
|
|
// finish collection
|
|
|
|
if ( this.cb ) {
|
|
|
|
// abort further token processing since the cb handled it
|
|
|
|
return this.cb( state, tokens );
|
|
|
|
} else {
|
|
|
|
// let a handler deal with token processing
|
|
|
|
return false;
|
|
|
|
}
|
2012-07-13 20:57:04 +00:00
|
|
|
} else {
|
2012-07-16 23:17:12 +00:00
|
|
|
// continue collection
|
2012-07-13 20:57:04 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
tokens: tokens
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
var closeHeading = function(v) {
|
|
|
|
return function(state, token) {
|
2012-07-05 21:50:48 +00:00
|
|
|
var prevToken = state.prevToken;
|
|
|
|
// Deal with empty headings. Ex: <h1></h1>
|
|
|
|
if (prevToken.constructor === TagTk && prevToken.name === token.name) {
|
|
|
|
return "<nowiki></nowiki>" + v;
|
|
|
|
} else {
|
2012-07-27 00:27:37 +00:00
|
|
|
return v;
|
2012-07-05 21:50:48 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2012-06-30 04:47:59 +00:00
|
|
|
function isListItem(token) {
|
2012-07-27 00:27:37 +00:00
|
|
|
if (token.constructor !== TagTk) {
|
|
|
|
return false;
|
|
|
|
}
|
2012-06-14 22:01:52 +00:00
|
|
|
|
2012-06-30 04:47:59 +00:00
|
|
|
var tokenName = token.name;
|
|
|
|
return (tokenName === 'li' || tokenName === 'dt' || tokenName === 'dd');
|
|
|
|
}
|
|
|
|
|
|
|
|
WSP._listHandler = function( handler, bullet, state, token ) {
|
2012-06-19 23:48:52 +00:00
|
|
|
if ( state.singleLineMode ) {
|
|
|
|
state.singleLineMode--;
|
|
|
|
}
|
2012-06-14 22:01:52 +00:00
|
|
|
|
2012-05-17 15:51:18 +00:00
|
|
|
var bullets, res;
|
|
|
|
var stack = state.listStack;
|
|
|
|
if (stack.length === 0) {
|
2012-05-29 06:04:19 +00:00
|
|
|
bullets = bullet;
|
2012-05-17 15:51:18 +00:00
|
|
|
res = bullets;
|
2012-06-21 11:40:08 +00:00
|
|
|
handler.startsNewline = true;
|
2012-05-16 14:53:18 +00:00
|
|
|
} else {
|
2012-06-21 14:36:37 +00:00
|
|
|
var curList = stack.last();
|
2012-06-20 21:39:51 +00:00
|
|
|
//console.warn(JSON.stringify( stack ));
|
|
|
|
bullets = curList.bullets + curList.itemBullet + bullet;
|
2012-05-17 17:32:46 +00:00
|
|
|
curList.itemCount++;
|
2012-06-27 05:08:47 +00:00
|
|
|
// A nested list, not directly after a list item
|
|
|
|
if (curList.itemCount > 1 && !isListItem(state.prevToken)) {
|
2012-05-17 15:51:18 +00:00
|
|
|
res = bullets;
|
2012-06-21 11:40:08 +00:00
|
|
|
handler.startsNewline = true;
|
2012-05-17 15:51:18 +00:00
|
|
|
} else {
|
|
|
|
res = bullet;
|
2012-06-21 11:40:08 +00:00
|
|
|
handler.startsNewline = false;
|
2012-05-16 14:53:18 +00:00
|
|
|
}
|
|
|
|
}
|
2012-06-20 21:39:51 +00:00
|
|
|
stack.push({ itemCount: 0, bullets: bullets, itemBullet: ''});
|
2012-06-21 11:40:08 +00:00
|
|
|
state.env.dp('lh res', bullets, res, handler );
|
2012-06-14 05:10:51 +00:00
|
|
|
return res;
|
2012-05-16 14:53:18 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
WSP._listEndHandler = function( state, token ) {
|
|
|
|
state.listStack.pop();
|
|
|
|
return '';
|
|
|
|
};
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
WSP._listItemHandler = function ( handler, bullet, state, token ) {
|
2012-06-22 22:51:50 +00:00
|
|
|
|
|
|
|
function isRepeatToken(state, token) {
|
2012-07-27 00:27:37 +00:00
|
|
|
return state.prevToken.constructor === EndTagTk &&
|
2012-06-22 22:51:50 +00:00
|
|
|
state.prevToken.name === token.name;
|
|
|
|
}
|
|
|
|
|
|
|
|
function isMultiLineDtDdPair(state, token) {
|
2012-07-27 00:27:37 +00:00
|
|
|
return token.name === 'dd' &&
|
2012-06-22 22:51:50 +00:00
|
|
|
token.dataAttribs.stx !== 'row' &&
|
|
|
|
state.prevTagToken.constructor === EndTagTk &&
|
|
|
|
state.prevTagToken.name === 'dt';
|
|
|
|
}
|
|
|
|
|
2012-06-14 05:10:51 +00:00
|
|
|
var stack = state.listStack;
|
2012-07-11 20:38:09 +00:00
|
|
|
|
|
|
|
// This check is required to handle cases where the DOM is not well-formed.
|
|
|
|
//
|
|
|
|
// FIXME NOTE: This is required currently to deal with bugs in the parser
|
|
|
|
// as it deals with complex cases. But, in the future, we could deal with
|
|
|
|
// this in one of the following ways:
|
2012-07-27 00:27:37 +00:00
|
|
|
// (a) The serializer expects a well-formed DOM and all cleanup will be
|
2012-07-11 20:38:09 +00:00
|
|
|
// done as part of external tools/passes.
|
|
|
|
// (b) The serializer supports a small set of exceptional cases and bare
|
|
|
|
// list items could be one of them
|
|
|
|
// (c) The serializer ought to handle any DOM that is thrown at it.
|
|
|
|
//
|
|
|
|
// Yet to be resolved.
|
|
|
|
if (stack.length === 0) {
|
|
|
|
stack.push({ itemCount: 0, bullets: bullet, itemBullet: bullet});
|
|
|
|
}
|
|
|
|
|
2012-06-14 05:10:51 +00:00
|
|
|
var curList = stack[stack.length - 1];
|
|
|
|
curList.itemCount++;
|
2012-06-20 21:39:51 +00:00
|
|
|
curList.itemBullet = bullet;
|
2012-06-22 22:51:50 +00:00
|
|
|
|
|
|
|
// Output bullet prefix only if:
|
|
|
|
// - this is not the first list item
|
|
|
|
// - we are either in:
|
2012-07-27 00:27:37 +00:00
|
|
|
// * a new line context,
|
2012-06-22 22:51:50 +00:00
|
|
|
// * seeing an identical token as the last one (..</li><li>...)
|
|
|
|
// (since we are in this handler on encountering a list item token,
|
|
|
|
// this means we are the 2nd or later item in the list, BUT without
|
|
|
|
// any intervening new lines or other tokens in between)
|
|
|
|
// * on the dd part of a multi-line dt-dd pair
|
|
|
|
// (The dd on a single-line dt-dd pair sticks to the dt.
|
|
|
|
// which means it won't get the bullets that the dt already got).
|
|
|
|
//
|
|
|
|
// SSS FIXME: This condition could be rephrased as:
|
|
|
|
//
|
|
|
|
// if (isRepeatToken(state, token) ||
|
|
|
|
// (curList.itemCount > 1 && (inStartOfLineContext(state) || isMultiLineDtDdPair(state, token))))
|
|
|
|
//
|
|
|
|
var res;
|
2012-07-27 00:27:37 +00:00
|
|
|
if (curList.itemCount > 1 &&
|
2012-06-27 05:08:47 +00:00
|
|
|
( state.onStartOfLine ||
|
2012-06-22 22:51:50 +00:00
|
|
|
isRepeatToken(state, token) ||
|
|
|
|
isMultiLineDtDdPair(state, token)
|
|
|
|
)
|
2012-06-21 14:36:37 +00:00
|
|
|
)
|
2012-06-21 11:40:08 +00:00
|
|
|
{
|
|
|
|
handler.startsNewline = true;
|
2012-06-20 21:39:51 +00:00
|
|
|
res = curList.bullets + bullet;
|
|
|
|
} else {
|
2012-06-21 11:40:08 +00:00
|
|
|
handler.startsNewline = false;
|
2012-06-20 21:39:51 +00:00
|
|
|
res = bullet;
|
|
|
|
}
|
2012-06-21 11:40:08 +00:00
|
|
|
state.env.dp( 'lih', token, res, handler );
|
2012-06-20 21:39:51 +00:00
|
|
|
return res;
|
2012-05-16 14:53:18 +00:00
|
|
|
};
|
|
|
|
|
2012-07-16 23:17:12 +00:00
|
|
|
|
|
|
|
WSP._figureHandler = function ( state, figTokens ) {
|
|
|
|
|
|
|
|
// skip tokens looking for the image tag
|
|
|
|
var img;
|
|
|
|
var i = 1, n = figTokens.length;
|
|
|
|
while (i < n) {
|
|
|
|
if (figTokens[i].name === "img") {
|
|
|
|
img = figTokens[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// skip tokens looking for the start and end caption tags
|
|
|
|
var fcStartIndex = 0, fcEndIndex = 0;
|
|
|
|
while (i < n) {
|
|
|
|
if (figTokens[i].name === "figcaption") {
|
|
|
|
if (fcStartIndex > 0) {
|
|
|
|
fcEndIndex = i;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
fcStartIndex = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Call the serializer to build the caption
|
|
|
|
var caption = state.serializer.serializeTokens(figTokens.slice(fcStartIndex+1, fcEndIndex)).join('');
|
|
|
|
|
|
|
|
// Get the image resource name
|
|
|
|
// FIXME: file name has been capitalized -- need some fix in the parser
|
|
|
|
var argDict = state.env.KVtoHash( img.attribs );
|
|
|
|
var imgR = argDict.resource.replace(/(^\[:)|(\]$)/g, '');
|
|
|
|
|
|
|
|
// Now, build the complete wikitext for the figure
|
|
|
|
var outBits = [imgR];
|
|
|
|
var figToken = figTokens[0];
|
|
|
|
var figAttrs = figToken.dataAttribs.optionList;
|
|
|
|
|
|
|
|
var simpleImgOptions = WikitextConstants.Image.SimpleOptions;
|
|
|
|
var prefixImgOptions = WikitextConstants.Image.PrefixOptions;
|
|
|
|
var sizeOptions = { "width": 1, "height": 1};
|
|
|
|
var size = {};
|
|
|
|
for (i = 0, n = figAttrs.length; i < n; i++) {
|
|
|
|
var a = figAttrs[i];
|
|
|
|
var k = a.k, v = a.v;
|
|
|
|
if (sizeOptions[k]) {
|
|
|
|
size[k] = v;
|
|
|
|
} else {
|
|
|
|
// Output size first and clear it
|
|
|
|
var w = size.width;
|
|
|
|
if (w) {
|
|
|
|
outBits.push(w + (size.height ? "x" + size.height : '') + "px");
|
|
|
|
size.width = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (k === "aspect") {
|
|
|
|
// SSS: Bad Hack! Need a better solution
|
|
|
|
// One solution is to search through prefix options hash but seems ugly.
|
|
|
|
// Another is to flip prefix options hash and use it to search.
|
|
|
|
if (v) {
|
|
|
|
outBits.push("upright=" + v);
|
|
|
|
} else {
|
|
|
|
outBits.push("upright");
|
|
|
|
}
|
|
|
|
} else if (simpleImgOptions[v.trim()] === k) {
|
|
|
|
// The values and keys in the parser attributes are a flip
|
|
|
|
// of how they are in the wikitext constants image hash
|
|
|
|
// Hence the indexing by 'v' instead of 'k'
|
|
|
|
outBits.push(v);
|
|
|
|
} else if (prefixImgOptions[k.trim()]) {
|
|
|
|
outBits.push(k + "=" + v);
|
|
|
|
} else {
|
|
|
|
console.warn("Unknown image option encountered: " + JSON.stringify(a));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (caption) {
|
|
|
|
outBits.push(caption);
|
|
|
|
}
|
|
|
|
|
|
|
|
return "[[" + outBits.join('|') + "]]";
|
|
|
|
};
|
|
|
|
|
2012-06-16 15:09:06 +00:00
|
|
|
WSP._serializeTableTag = function ( symbol, optionEndSymbol, state, token ) {
|
2012-05-16 14:53:18 +00:00
|
|
|
if ( token.attribs.length ) {
|
2012-06-14 05:10:51 +00:00
|
|
|
return symbol + ' ' + WSP._serializeAttributes( token.attribs ) + optionEndSymbol;
|
2012-05-16 14:53:18 +00:00
|
|
|
} else {
|
2012-05-21 12:46:07 +00:00
|
|
|
return symbol;
|
2012-05-16 14:53:18 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-05-22 11:36:06 +00:00
|
|
|
WSP._emptyTags = { br: true, meta: true };
|
|
|
|
|
|
|
|
WSP._serializeHTMLTag = function ( state, token ) {
|
|
|
|
var close = '';
|
|
|
|
if ( WSP._emptyTags[ token.name ] ) {
|
|
|
|
close = '/';
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
|
2012-06-20 17:28:34 +00:00
|
|
|
if ( token.name === 'pre' ) {
|
|
|
|
// html-syntax pre is very similar to nowiki
|
|
|
|
state.inHTMLPre = true;
|
|
|
|
}
|
|
|
|
|
2012-05-22 11:36:06 +00:00
|
|
|
if ( token.attribs.length ) {
|
2012-07-27 00:27:37 +00:00
|
|
|
return '<' + token.name + ' ' +
|
2012-05-22 11:36:06 +00:00
|
|
|
WSP._serializeAttributes( token.attribs ) + close + '>';
|
|
|
|
} else {
|
|
|
|
return '<' + token.name + close + '>';
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
WSP._serializeHTMLEndTag = function ( state, token ) {
|
2012-06-20 17:28:34 +00:00
|
|
|
if ( token.name === 'pre' ) {
|
|
|
|
state.inHTMLPre = false;
|
|
|
|
}
|
2012-05-22 11:36:06 +00:00
|
|
|
if ( ! WSP._emptyTags[ token.name ] ) {
|
|
|
|
return '</' + token.name + '>';
|
|
|
|
} else {
|
|
|
|
return '';
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
|
2012-07-18 23:19:52 +00:00
|
|
|
WSP._linkHandler = function( state, tokens ) {
|
2012-05-22 11:36:06 +00:00
|
|
|
//return '[[';
|
2012-05-17 12:32:56 +00:00
|
|
|
// TODO: handle internal/external links etc using RDFa and dataAttribs
|
2012-07-16 23:17:12 +00:00
|
|
|
// Also convert unannotated html links without advanced attributes to
|
|
|
|
// external wiki links for html import. Might want to consider converting
|
|
|
|
// relative links without path component and file extension to wiki links.
|
2012-05-22 11:36:06 +00:00
|
|
|
|
2012-07-18 23:19:52 +00:00
|
|
|
var env = state.env,
|
|
|
|
token = tokens.shift(),
|
2012-07-27 00:27:37 +00:00
|
|
|
endToken = tokens.pop(),
|
|
|
|
attribDict = env.KVtoHash( token.attribs );
|
|
|
|
if ( attribDict.rel && attribDict.rel.match( /\bmw:/ ) &&
|
|
|
|
attribDict.href !== undefined )
|
|
|
|
{
|
|
|
|
// we have a rel starting with mw: prefix and href
|
2012-06-13 18:47:26 +00:00
|
|
|
var tokenData = token.dataAttribs;
|
2012-07-27 00:27:37 +00:00
|
|
|
if ( attribDict.rel === 'mw:WikiLink' ) {
|
|
|
|
var base = env.wgScriptPath,
|
|
|
|
hrefInfo = token.getAttributeShadowInfo( 'href' ),
|
|
|
|
target = hrefInfo.value,
|
|
|
|
tail = '',
|
|
|
|
prefix = target.substr(0, base.length);
|
|
|
|
if ( prefix === base) {
|
|
|
|
target = target.substr(base.length);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( hrefInfo.modified ) {
|
|
|
|
// there was no rt info or the href was modified: normalize it
|
|
|
|
target = target.replace( /_/g, ' ' );
|
2012-07-18 23:19:52 +00:00
|
|
|
tail = '';
|
2012-07-27 00:27:37 +00:00
|
|
|
} else {
|
|
|
|
tail = tokenData.tail || '';
|
2012-06-02 04:29:39 +00:00
|
|
|
}
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
var unencodedTarget = target;
|
|
|
|
|
|
|
|
// Escape anything that looks like percent encoding, since we
|
|
|
|
// decode the wikitext
|
|
|
|
target = target.replace( /%(?=[a-f\d]{2})/g, '%25' );
|
2012-06-05 15:26:27 +00:00
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
// If the normalized link text is the same as the normalized
|
|
|
|
// target and the link was either modified or not originally a
|
|
|
|
// piped link, serialize to a simple link.
|
|
|
|
// TODO: implement
|
|
|
|
|
|
|
|
var linkText = state.env.tokensToString( tokens, true );
|
|
|
|
|
|
|
|
|
|
|
|
//env.ap( linkText, target );
|
|
|
|
if ( linkText.constructor === String &&
|
|
|
|
env.normalizeTitle( Util.stripSuffix( linkText, tail ) ) ===
|
|
|
|
env.normalizeTitle( unencodedTarget ) &&
|
|
|
|
( hrefInfo.modified || token.dataAttribs.stx === 'simple' ) )
|
|
|
|
{
|
2012-07-18 23:19:52 +00:00
|
|
|
return '[[' + target + ']]' + tail;
|
2012-05-22 11:36:06 +00:00
|
|
|
} else {
|
2012-07-18 23:19:52 +00:00
|
|
|
var content = state.serializer.serializeTokens( tokens ).join('');
|
2012-07-27 00:27:37 +00:00
|
|
|
content = Util.stripSuffix( content, tail );
|
2012-07-18 23:19:52 +00:00
|
|
|
return '[[' + target + '|' + content + ']]' + tail;
|
2012-05-22 11:36:06 +00:00
|
|
|
}
|
2012-07-18 23:19:52 +00:00
|
|
|
} else if ( attribDict.rel === 'mw:ExtLink' ) {
|
2012-07-27 00:27:37 +00:00
|
|
|
return '[' + attribDict.href + ' ' +
|
|
|
|
state.serializer.serializeTokens( tokens ).join('') +
|
2012-07-24 00:49:54 +00:00
|
|
|
']';
|
|
|
|
} else if ( attribDict.rel === 'mw:UrlLink' ) {
|
|
|
|
return attribDict.href;
|
|
|
|
} else if ( attribDict.rel === 'mw:NumberedExtLink' ) {
|
|
|
|
return '[' + attribDict.href + ']';
|
2012-07-23 21:24:00 +00:00
|
|
|
} else if ( attribDict.rel === 'mw:Image' ) {
|
|
|
|
// simple source-based round-tripping for now..
|
|
|
|
// TODO: properly implement!
|
|
|
|
if ( token.dataAttribs.src ) {
|
|
|
|
return token.dataAttribs.src;
|
|
|
|
}
|
2012-05-22 11:36:06 +00:00
|
|
|
} else {
|
2012-07-16 23:17:12 +00:00
|
|
|
// Unknown rel was set
|
2012-05-22 11:36:06 +00:00
|
|
|
return WSP._serializeHTMLTag( state, token );
|
|
|
|
}
|
|
|
|
} else {
|
2012-07-16 23:17:12 +00:00
|
|
|
// TODO: default to extlink for simple links with unknown rel set
|
|
|
|
// switch to html only when needed to support attributes
|
|
|
|
|
|
|
|
var isComplexLink = function ( attribDict ) {
|
|
|
|
for ( var name in attribDict ) {
|
2012-07-18 23:19:52 +00:00
|
|
|
if ( name && ! ( name in { href: 1 } ) ) {
|
2012-07-16 23:17:12 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
if ( true || isComplexLink ( attribDict ) ) {
|
|
|
|
// Complex attributes we can't support in wiki syntax
|
2012-07-18 23:19:52 +00:00
|
|
|
return WSP._serializeHTMLTag( state, token ) +
|
|
|
|
state.serializer.serializeTokens( tokens ) +
|
|
|
|
WSP._serializeHTMLEndTag( state, endToken );
|
2012-07-16 23:17:12 +00:00
|
|
|
} else {
|
|
|
|
// TODO: serialize as external wikilink
|
|
|
|
return '';
|
|
|
|
}
|
|
|
|
|
2012-05-22 11:36:06 +00:00
|
|
|
}
|
|
|
|
|
2012-05-17 12:32:56 +00:00
|
|
|
//if ( rtinfo.type === 'wikilink' ) {
|
|
|
|
// return '[[' + rtinfo.target + ']]';
|
|
|
|
//} else {
|
|
|
|
// // external link
|
|
|
|
// return '[' + rtinfo.
|
|
|
|
};
|
2012-07-18 23:19:52 +00:00
|
|
|
|
|
|
|
WSP.genContentSpanTypes = { 'mw:Nowiki':1, 'mw:Entity': 1 };
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compare the actual content with the previous content and use
|
|
|
|
* dataAttribs.src if it does. Return serialization of modified content
|
|
|
|
* otherwise.
|
|
|
|
*/
|
|
|
|
WSP.compareSourceHandler = function ( state, tokens ) {
|
|
|
|
var token = tokens.shift(),
|
|
|
|
lastToken = tokens.pop(),
|
|
|
|
content = state.env.tokensToString( tokens, true );
|
|
|
|
if ( content.constructor !== String ) {
|
|
|
|
return state.serializer.serializeTokens( tokens ).join('');
|
|
|
|
} else if ( content === token.dataAttribs.srcContent ) {
|
|
|
|
return token.dataAttribs.src;
|
2012-05-22 11:36:06 +00:00
|
|
|
} else {
|
2012-07-18 23:19:52 +00:00
|
|
|
return content;
|
2012-05-22 11:36:06 +00:00
|
|
|
}
|
2012-05-17 12:32:56 +00:00
|
|
|
};
|
|
|
|
|
2012-07-18 23:19:52 +00:00
|
|
|
|
|
|
|
|
2012-06-22 22:51:50 +00:00
|
|
|
/* *********************************************************************
|
|
|
|
* startsNewline
|
|
|
|
* if true, the wikitext for the dom subtree rooted
|
|
|
|
* at this html tag requires a new line context.
|
|
|
|
*
|
|
|
|
* endsLine
|
|
|
|
* if true, the wikitext for the dom subtree rooted
|
|
|
|
* at this html tag ends the line.
|
|
|
|
*
|
|
|
|
* pairsSepNlCount
|
|
|
|
* # of new lines required between wikitext for dom siblings
|
|
|
|
* of the same tag type (..</p><p>.., etc.)
|
|
|
|
*
|
|
|
|
* newlineTransparent
|
|
|
|
* if true, this token does not change the newline status
|
|
|
|
* after it is emitted.
|
|
|
|
*
|
|
|
|
* singleLine
|
|
|
|
* if 1, the wikitext for the dom subtree rooted at this html tag
|
2012-07-27 00:27:37 +00:00
|
|
|
* requires all content to be emitted on the same line without
|
2012-06-22 22:51:50 +00:00
|
|
|
* any line breaks. +1 sets the single-line mode (on descending
|
|
|
|
* the dom subtree), -1 clears the single-line mod (on exiting
|
|
|
|
* the dom subtree).
|
|
|
|
*
|
|
|
|
* ignore
|
|
|
|
* if true, the serializer pretends as if it never saw this token.
|
|
|
|
* ********************************************************************* */
|
2012-06-16 15:09:06 +00:00
|
|
|
WSP.tagHandlers = {
|
|
|
|
body: {
|
2012-06-27 05:08:47 +00:00
|
|
|
end: {
|
2012-06-18 15:18:20 +00:00
|
|
|
handle: function(state, token) {
|
|
|
|
// swallow trailing new line
|
|
|
|
state.emitNewlineOnNextToken = false;
|
|
|
|
return '';
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
}
|
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
ul: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
startsNewline : true,
|
2012-06-21 11:40:08 +00:00
|
|
|
handle: function ( state, token ) {
|
|
|
|
return WSP._listHandler( this, '*', state, token );
|
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
pairSepNLCount: 2,
|
2012-06-18 20:56:41 +00:00
|
|
|
newlineTransparent: true
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
|
|
|
end: {
|
|
|
|
endsLine: true,
|
|
|
|
handle: WSP._listEndHandler
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
ol: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
startsNewline : true,
|
2012-06-21 11:40:08 +00:00
|
|
|
handle: function ( state, token ) {
|
|
|
|
return WSP._listHandler( this, '#', state, token );
|
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
pairSepNLCount: 2,
|
2012-06-18 20:56:41 +00:00
|
|
|
newlineTransparent: true
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
|
|
|
end: {
|
|
|
|
endsLine : true,
|
|
|
|
handle: WSP._listEndHandler
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
dl: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
startsNewline : true,
|
2012-06-21 11:40:08 +00:00
|
|
|
handle: function ( state, token ) {
|
|
|
|
return WSP._listHandler( this, '', state, token );
|
|
|
|
},
|
2012-06-20 11:50:05 +00:00
|
|
|
pairSepNLCount: 2
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
|
|
|
end: {
|
|
|
|
endsLine: true,
|
|
|
|
handle: WSP._listEndHandler
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
li: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
2012-06-21 11:40:08 +00:00
|
|
|
handle: function ( state, token ) {
|
|
|
|
return WSP._listItemHandler( this, '', state, token );
|
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
singleLine: 1,
|
|
|
|
pairSepNLCount: 1
|
2012-06-16 15:09:06 +00:00
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
end: {
|
|
|
|
singleLine: -1
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
},
|
|
|
|
// XXX: handle single-line vs. multi-line dls etc
|
2012-07-27 00:27:37 +00:00
|
|
|
dt: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
singleLine: 1,
|
2012-06-21 11:40:08 +00:00
|
|
|
handle: function ( state, token ) {
|
|
|
|
return WSP._listItemHandler( this, ';', state, token );
|
|
|
|
},
|
2012-06-20 11:50:05 +00:00
|
|
|
pairSepNLCount: 1,
|
|
|
|
newlineTransparent: true
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
|
|
|
end: {
|
|
|
|
singleLine: -1
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
dd: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
singleLine: 1,
|
2012-06-21 11:40:08 +00:00
|
|
|
handle: function ( state, token ) {
|
|
|
|
return WSP._listItemHandler( this, ':', state, token );
|
|
|
|
},
|
2012-06-20 11:50:05 +00:00
|
|
|
pairSepNLCount: 1,
|
|
|
|
newlineTransparent: true
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
|
|
|
end: {
|
|
|
|
endsLine: true,
|
|
|
|
singleLine: -1
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
},
|
2012-05-16 14:53:18 +00:00
|
|
|
// XXX: handle options
|
2012-07-27 00:27:37 +00:00
|
|
|
table: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
handle: WSP._serializeTableTag.bind(null, "{|", '')
|
|
|
|
},
|
|
|
|
end: {
|
|
|
|
handle: function(state, token) {
|
|
|
|
if ( state.prevTagToken && state.prevTagToken.name === 'tr' ) {
|
|
|
|
this.startsNewline = true;
|
|
|
|
} else {
|
|
|
|
this.startsNewline = false;
|
|
|
|
}
|
|
|
|
return "|}";
|
|
|
|
}
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
tbody: { start: { ignore: true }, end: { ignore: true } },
|
2012-07-27 00:27:37 +00:00
|
|
|
th: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
handle: function ( state, token ) {
|
|
|
|
if ( token.dataAttribs.stx_v === 'row' ) {
|
|
|
|
this.startsNewline = false;
|
|
|
|
return WSP._serializeTableTag("!!", ' |', state, token);
|
|
|
|
} else {
|
|
|
|
this.startsNewline = true;
|
|
|
|
return WSP._serializeTableTag( "!", ' |', state, token);
|
|
|
|
}
|
2012-05-21 12:46:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
tr: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
handle: function ( state, token ) {
|
|
|
|
if ( state.prevToken.constructor === TagTk && state.prevToken.name === 'tbody' ) {
|
|
|
|
// Omit for first row in a table. XXX: support optional trs
|
|
|
|
// for first line (in source wikitext) too using some flag in
|
2012-07-24 17:24:22 +00:00
|
|
|
// data-parsoid (stx: 'wikitext' ?)
|
2012-06-18 15:18:20 +00:00
|
|
|
return '';
|
|
|
|
} else {
|
|
|
|
return WSP._serializeTableTag("|-", '', state, token );
|
|
|
|
}
|
|
|
|
},
|
|
|
|
startsNewline: true
|
2012-05-21 12:46:07 +00:00
|
|
|
}
|
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
td: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
handle: function ( state, token ) {
|
|
|
|
if ( token.dataAttribs.stx_v === 'row' ) {
|
|
|
|
this.startsNewline = false;
|
|
|
|
return WSP._serializeTableTag("||", ' |', state, token);
|
|
|
|
} else {
|
|
|
|
this.startsNewline = true;
|
|
|
|
return WSP._serializeTableTag("|", ' |', state, token);
|
|
|
|
}
|
2012-05-21 12:46:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
caption: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
startsNewline: true,
|
|
|
|
handle: WSP._serializeTableTag.bind(null, "|+", ' |')
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
},
|
2012-06-30 04:47:59 +00:00
|
|
|
p: {
|
2012-06-18 15:18:20 +00:00
|
|
|
make: function(state, token) {
|
2012-06-30 04:47:59 +00:00
|
|
|
// "stx": "html" tags never get here
|
2012-06-16 15:09:06 +00:00
|
|
|
// Special case handling in a list context
|
2012-06-30 04:47:59 +00:00
|
|
|
// VE embeds list content in paragraph tags.
|
|
|
|
//
|
|
|
|
// SSS FIXME: This will *NOT* work if the list item has nested paragraph tags!
|
|
|
|
var prevToken = state.prevToken;
|
2012-07-27 00:27:37 +00:00
|
|
|
if ( token.attribs.length === 0 &&
|
|
|
|
( (state.listStack.length > 0 && isListItem(prevToken)) ||
|
|
|
|
(prevToken.constructor === TagTk && prevToken.name === 'td') ||
|
2012-07-18 23:19:52 +00:00
|
|
|
(state.ignorePTag && token.constructor === EndTagTk)))
|
2012-06-30 04:47:59 +00:00
|
|
|
{
|
|
|
|
state.ignorePTag = !state.ignorePTag;
|
|
|
|
return { start: { ignore: true }, end: { ignore: true } };
|
|
|
|
} else {
|
|
|
|
return state.singleLineMode ? WSP.defaultHTMLTagHandler : this;
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
startsNewline : true,
|
|
|
|
pairSepNLCount: 2
|
|
|
|
},
|
|
|
|
end: {
|
|
|
|
endsLine: true
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
},
|
2012-05-25 16:55:38 +00:00
|
|
|
// XXX: support indent variant instead by registering a newline handler?
|
2012-07-27 00:27:37 +00:00
|
|
|
pre: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
startsNewline: true,
|
2012-06-29 21:05:06 +00:00
|
|
|
pairSepNLCount: 2,
|
2012-06-18 15:18:20 +00:00
|
|
|
handle: function( state, token ) {
|
2012-06-20 17:28:34 +00:00
|
|
|
state.inIndentPre = true;
|
2012-07-27 00:27:37 +00:00
|
|
|
state.textHandler = function( t ) {
|
|
|
|
return t.replace(/\n/g, '\n ' );
|
2012-06-19 13:24:13 +00:00
|
|
|
};
|
2012-06-18 15:18:20 +00:00
|
|
|
return ' ';
|
|
|
|
}
|
2012-05-25 16:55:38 +00:00
|
|
|
},
|
2012-06-18 15:18:20 +00:00
|
|
|
end: {
|
|
|
|
endsLine: true,
|
2012-07-27 00:27:37 +00:00
|
|
|
handle: function( state, token) {
|
2012-06-20 17:28:34 +00:00
|
|
|
state.inIndentPre = false;
|
2012-07-27 00:27:37 +00:00
|
|
|
state.textHandler = null;
|
|
|
|
return '';
|
2012-06-20 17:28:34 +00:00
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
}
|
2012-05-25 16:55:38 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
meta: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: {
|
|
|
|
handle: function ( state, token ) {
|
|
|
|
var argDict = state.env.KVtoHash( token.attribs );
|
|
|
|
if ( argDict['typeof'] === 'mw:tag' ) {
|
2012-06-18 20:56:41 +00:00
|
|
|
// we use this currently for nowiki and noinclude & co
|
|
|
|
this.newlineTransparent = true;
|
2012-06-19 13:24:13 +00:00
|
|
|
if ( argDict.content === 'nowiki' ) {
|
|
|
|
state.inNoWiki = true;
|
|
|
|
} else if ( argDict.content === '/nowiki' ) {
|
|
|
|
state.inNoWiki = false;
|
2012-06-20 11:50:05 +00:00
|
|
|
} else {
|
|
|
|
console.warn( JSON.stringify( argDict ) );
|
2012-06-19 13:24:13 +00:00
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
return '<' + argDict.content + '>';
|
2012-06-27 11:55:15 +00:00
|
|
|
} else if ( argDict['typeof'] === 'mw:noinclude' ) {
|
|
|
|
this.newlineTransparent = true;
|
|
|
|
if ( token.dataAttribs.src === '<noinclude>' ) {
|
|
|
|
return '<noinclude>';
|
|
|
|
} else {
|
|
|
|
return '</noinclude>';
|
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
} else {
|
2012-06-18 20:56:41 +00:00
|
|
|
this.newlineTransparent = false;
|
2012-06-18 15:18:20 +00:00
|
|
|
return WSP._serializeHTMLTag( state, token );
|
|
|
|
}
|
2012-05-22 11:36:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2012-06-26 12:18:21 +00:00
|
|
|
span: {
|
|
|
|
start: {
|
|
|
|
handle: function( state, token ) {
|
|
|
|
var argDict = state.env.KVtoHash( token.attribs );
|
2012-07-18 23:19:52 +00:00
|
|
|
if ( argDict['typeof'] in WSP.genContentSpanTypes ) {
|
|
|
|
if ( argDict['typeof'] === 'mw:Nowiki' ) {
|
2012-06-28 12:57:05 +00:00
|
|
|
state.inNoWiki = true;
|
|
|
|
return '<nowiki>';
|
|
|
|
} else if ( token.dataAttribs.src ) {
|
|
|
|
// FIXME: compare content with original content
|
2012-07-18 23:19:52 +00:00
|
|
|
return installCollector(
|
|
|
|
endTagMatchTokenCollector,
|
|
|
|
WSP.compareSourceHandler,
|
|
|
|
this,
|
|
|
|
state, token
|
|
|
|
);
|
2012-06-27 11:55:15 +00:00
|
|
|
}
|
2012-06-26 12:18:21 +00:00
|
|
|
} else {
|
|
|
|
// Fall back to plain HTML serialization for spans created
|
|
|
|
// by the editor
|
|
|
|
return WSP._serializeHTMLTag( state, token );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
end: {
|
2012-07-27 00:27:37 +00:00
|
|
|
handle: function ( state, token ) {
|
2012-06-26 12:18:21 +00:00
|
|
|
var argDict = state.env.KVtoHash( token.attribs );
|
2012-07-18 23:19:52 +00:00
|
|
|
if ( argDict['typeof'] in WSP.genContentSpanTypes ) {
|
|
|
|
if ( argDict['typeof'] === 'mw:Nowiki' ) {
|
2012-06-28 12:57:05 +00:00
|
|
|
state.inNoWiki = false;
|
|
|
|
return '</nowiki>';
|
2012-06-27 11:55:15 +00:00
|
|
|
}
|
2012-06-26 12:18:21 +00:00
|
|
|
} else {
|
2012-06-27 11:55:15 +00:00
|
|
|
// Fall back to plain HTML serialization for spans created
|
|
|
|
// by the editor
|
2012-06-26 12:18:21 +00:00
|
|
|
return WSP._serializeHTMLEndTag( state, token );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2012-06-28 04:43:35 +00:00
|
|
|
figure: {
|
|
|
|
start: {
|
2012-07-27 00:27:37 +00:00
|
|
|
handle: function ( state, token ) {
|
2012-07-16 23:17:12 +00:00
|
|
|
state.tokenCollector = endTagMatchTokenCollector( token, WSP._figureHandler );
|
|
|
|
// Set the handler- not terribly useful since this one doesn't
|
|
|
|
// have any flags, but still useful for general testing
|
|
|
|
state.tokenCollector.handler = this;
|
2012-06-28 04:43:35 +00:00
|
|
|
return '';
|
|
|
|
}
|
2012-07-18 23:19:52 +00:00
|
|
|
}
|
2012-06-28 04:43:35 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
hr: {
|
|
|
|
start: {
|
|
|
|
startsNewline: true,
|
2012-06-21 08:30:11 +00:00
|
|
|
endsLine: true,
|
2012-07-03 05:08:45 +00:00
|
|
|
handle: function(state, token) {
|
|
|
|
var extra_dashes = token.dataAttribs.extra_dashes;
|
|
|
|
if (extra_dashes && (extra_dashes > 0)) {
|
|
|
|
var buf = ["----"];
|
|
|
|
for (var i = 0; i < extra_dashes; i++) {
|
|
|
|
buf.push("-");
|
|
|
|
}
|
|
|
|
return buf.join('');
|
|
|
|
} else {
|
|
|
|
// num_dashes undefined OR exactly 4
|
|
|
|
return "----";
|
|
|
|
}
|
|
|
|
}
|
2012-06-21 08:30:11 +00:00
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
h1: {
|
2012-06-28 10:42:19 +00:00
|
|
|
start: { startsNewline: true, handle: id("="), defaultStartNewlineCount: 2 },
|
2012-07-05 21:50:48 +00:00
|
|
|
end: { endsLine: true, handle: closeHeading("=") }
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
h2: {
|
2012-06-28 10:42:19 +00:00
|
|
|
start: { startsNewline: true, handle: id("=="), defaultStartNewlineCount: 2 },
|
2012-07-05 21:50:48 +00:00
|
|
|
end: { endsLine: true, handle: closeHeading("==") }
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
h3: {
|
2012-06-28 10:42:19 +00:00
|
|
|
start: { startsNewline: true, handle: id("==="), defaultStartNewlineCount: 2 },
|
2012-07-05 21:50:48 +00:00
|
|
|
end: { endsLine: true, handle: closeHeading("===") }
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
h4: {
|
2012-06-28 10:42:19 +00:00
|
|
|
start: { startsNewline: true, handle: id("===="), defaultStartNewlineCount: 2 },
|
2012-07-05 21:50:48 +00:00
|
|
|
end: { endsLine: true, handle: closeHeading("====") }
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
h5: {
|
2012-06-28 10:42:19 +00:00
|
|
|
start: { startsNewline: true, handle: id("====="), defaultStartNewlineCount: 2 },
|
2012-07-05 21:50:48 +00:00
|
|
|
end: { endsLine: true, handle: closeHeading("=====") }
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
h6: {
|
2012-06-28 10:42:19 +00:00
|
|
|
start: { startsNewline: true, handle: id("======"), defaultStartNewlineCount: 2 },
|
2012-07-05 21:50:48 +00:00
|
|
|
end: { endsLine: true, handle: closeHeading("======") }
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
br: {
|
|
|
|
start: {
|
2012-06-21 08:30:11 +00:00
|
|
|
startsNewline: true,
|
|
|
|
endsLine: true,
|
2012-07-27 00:27:37 +00:00
|
|
|
handle: id("")
|
2012-06-21 08:30:11 +00:00
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
b: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: { handle: id("'''") },
|
|
|
|
end: { handle: id("'''") }
|
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
i: {
|
2012-06-18 15:18:20 +00:00
|
|
|
start: { handle: id("''") },
|
|
|
|
end: { handle: id("''") }
|
|
|
|
},
|
2012-07-27 00:27:37 +00:00
|
|
|
a: {
|
|
|
|
start: {
|
|
|
|
handle: installCollector.bind(null,
|
|
|
|
endTagMatchTokenCollector,
|
2012-07-18 23:19:52 +00:00
|
|
|
WSP._linkHandler,
|
|
|
|
this
|
|
|
|
)
|
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
WSP._serializeAttributes = function ( attribs ) {
|
|
|
|
var out = [];
|
|
|
|
for ( var i = 0, l = attribs.length; i < l; i++ ) {
|
|
|
|
var kv = attribs[i];
|
|
|
|
if (kv.k.length) {
|
|
|
|
if ( kv.v.length ) {
|
2012-07-27 00:27:37 +00:00
|
|
|
out.push( kv.k + '=' +
|
2012-05-16 14:53:18 +00:00
|
|
|
'"' + kv.v.replace( '"', '"' ) + '"');
|
|
|
|
} else {
|
|
|
|
out.push( kv.k );
|
|
|
|
}
|
|
|
|
} else if ( kv.v.length ) {
|
|
|
|
// not very likely..
|
|
|
|
out.push( kv.v );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// XXX: round-trip optional whitespace / line breaks etc
|
|
|
|
return out.join(' ');
|
|
|
|
};
|
2012-05-21 12:46:07 +00:00
|
|
|
|
2012-05-16 14:53:18 +00:00
|
|
|
/**
|
|
|
|
* Serialize a chunk of tokens
|
|
|
|
*/
|
|
|
|
WSP.serializeTokens = function( tokens, chunkCB ) {
|
2012-06-22 22:51:50 +00:00
|
|
|
var state = $.extend({}, this.initialState, this.options),
|
2012-05-17 11:20:39 +00:00
|
|
|
i, l;
|
2012-06-28 04:43:35 +00:00
|
|
|
state.serializer = this;
|
2012-05-16 14:53:18 +00:00
|
|
|
if ( chunkCB === undefined ) {
|
|
|
|
var out = [];
|
2012-06-14 05:10:51 +00:00
|
|
|
state.chunkCB = out.push.bind(out);
|
2012-05-17 11:20:39 +00:00
|
|
|
for ( i = 0, l = tokens.length; i < l; i++ ) {
|
2012-05-21 12:46:07 +00:00
|
|
|
this._serializeToken( state, tokens[i] );
|
2012-05-17 11:20:39 +00:00
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
return out;
|
|
|
|
} else {
|
2012-06-14 05:10:51 +00:00
|
|
|
state.chunkCB = chunkCB;
|
2012-05-17 11:20:39 +00:00
|
|
|
for ( i = 0, l = tokens.length; i < l; i++ ) {
|
2012-05-21 12:46:07 +00:00
|
|
|
this._serializeToken( state, tokens[i] );
|
2012-05-17 11:20:39 +00:00
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
WSP.defaultHTMLTagHandler = {
|
|
|
|
start: { isNewlineEquivalent: true, handle: WSP._serializeHTMLTag },
|
|
|
|
end : { isNewlineEquivalent: true, handle: WSP._serializeHTMLEndTag }
|
2012-06-11 22:55:41 +00:00
|
|
|
};
|
|
|
|
|
2012-06-18 15:18:20 +00:00
|
|
|
WSP._getTokenHandler = function(state, token) {
|
|
|
|
var handler;
|
2012-06-28 15:40:20 +00:00
|
|
|
if ( token.dataAttribs.src !== undefined &&
|
2012-07-23 21:24:00 +00:00
|
|
|
Util.lookup( token.attribs, 'typeof' ) === 'mw:Placeholder' ) {
|
2012-07-27 00:27:37 +00:00
|
|
|
// implement generic src round-tripping:
|
2012-07-13 20:57:04 +00:00
|
|
|
// return src, and drop the generated content
|
2012-06-28 15:40:20 +00:00
|
|
|
if ( token.constructor === TagTk ) {
|
2012-07-13 20:57:04 +00:00
|
|
|
state.tokenCollector = endTagMatchTokenCollector( token );
|
2012-06-28 15:40:20 +00:00
|
|
|
return { handle: id( token.dataAttribs.src ) };
|
|
|
|
} else if ( token.constructor === SelfclosingTagTk ) {
|
|
|
|
return { handle: id( token.dataAttribs.src ) };
|
|
|
|
} else { // EndTagTk
|
2012-07-13 20:57:04 +00:00
|
|
|
state.tokenCollector = null;
|
2012-06-28 15:40:20 +00:00
|
|
|
return { handle: id('') };
|
|
|
|
}
|
|
|
|
} else if (token.dataAttribs.stx === 'html') {
|
2012-06-18 15:18:20 +00:00
|
|
|
handler = this.defaultHTMLTagHandler;
|
|
|
|
} else {
|
|
|
|
var tname = token.name;
|
|
|
|
handler = this.tagHandlers[tname];
|
|
|
|
if ( handler && handler.make ) {
|
|
|
|
handler = handler.make(state, token);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( ! handler ) {
|
|
|
|
handler = this.defaultHTMLTagHandler;
|
|
|
|
}
|
|
|
|
if ( token.constructor === TagTk || token.constructor === SelfclosingTagTk ) {
|
|
|
|
return handler.start || {};
|
2012-06-11 22:55:41 +00:00
|
|
|
} else {
|
2012-06-18 15:18:20 +00:00
|
|
|
return handler.end || {};
|
2012-06-11 22:55:41 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-05-17 11:20:39 +00:00
|
|
|
/**
|
|
|
|
* Serialize a token.
|
|
|
|
*/
|
2012-05-21 12:46:07 +00:00
|
|
|
WSP._serializeToken = function ( state, token ) {
|
2012-07-16 23:17:12 +00:00
|
|
|
var res = '',
|
|
|
|
collectorResult = false,
|
2012-07-27 00:27:37 +00:00
|
|
|
handler = {},
|
2012-07-16 23:17:12 +00:00
|
|
|
dropContent = state.dropContent;
|
|
|
|
|
2012-07-13 20:57:04 +00:00
|
|
|
if (state.tokenCollector) {
|
2012-07-18 23:19:52 +00:00
|
|
|
collectorResult = state.tokenCollector.collect( state, token );
|
2012-07-16 23:17:12 +00:00
|
|
|
if ( collectorResult === true ) {
|
2012-07-13 20:57:04 +00:00
|
|
|
// continue collecting
|
2012-06-28 04:43:35 +00:00
|
|
|
return;
|
2012-07-16 23:17:12 +00:00
|
|
|
} else if ( collectorResult !== false ) {
|
|
|
|
res = collectorResult;
|
|
|
|
if ( state.tokenCollector.handler ) {
|
|
|
|
handler = state.tokenCollector.handler;
|
|
|
|
}
|
|
|
|
state.tokenCollector = null;
|
2012-06-28 04:43:35 +00:00
|
|
|
}
|
2012-07-27 00:27:37 +00:00
|
|
|
}
|
2012-06-28 04:43:35 +00:00
|
|
|
|
2012-07-16 23:17:12 +00:00
|
|
|
if ( collectorResult === false ) {
|
2012-06-14 05:10:51 +00:00
|
|
|
|
|
|
|
|
2012-07-16 23:17:12 +00:00
|
|
|
state.prevToken = state.curToken;
|
|
|
|
state.curToken = token;
|
2012-06-18 15:18:20 +00:00
|
|
|
|
2012-07-16 23:17:12 +00:00
|
|
|
// The serializer is logically in a new line context if a new line is pending
|
|
|
|
if (state.emitNewlineOnNextToken || (state.availableNewlineCount > 0)) {
|
|
|
|
state.onNewline = true;
|
|
|
|
state.onStartOfLine = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch( token.constructor ) {
|
|
|
|
case TagTk:
|
|
|
|
case SelfclosingTagTk:
|
|
|
|
handler = WSP._getTokenHandler( state, token );
|
|
|
|
if ( ! handler.ignore ) {
|
|
|
|
state.prevTagToken = state.currTagToken;
|
|
|
|
state.currTagToken = token;
|
|
|
|
res = handler.handle ? handler.handle( state, token ) : '';
|
2012-06-18 15:18:20 +00:00
|
|
|
}
|
2012-07-16 23:17:12 +00:00
|
|
|
break;
|
|
|
|
case EndTagTk:
|
|
|
|
handler = WSP._getTokenHandler( state, token );
|
|
|
|
if ( ! handler.ignore ) {
|
|
|
|
state.prevTagToken = state.currTagToken;
|
|
|
|
state.currTagToken = token;
|
|
|
|
if ( handler.singleLine < 0 && state.singleLineMode ) {
|
|
|
|
state.singleLineMode--;
|
|
|
|
}
|
|
|
|
res = handler.handle ? handler.handle( state, token ) : '';
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case String:
|
2012-07-27 00:27:37 +00:00
|
|
|
res = ( state.inNoWiki || state.inHTMLPre ) ? token
|
2012-07-16 23:17:12 +00:00
|
|
|
: this.escapeWikiText( state, token );
|
|
|
|
res = state.textHandler ? state.textHandler( res ) : res;
|
|
|
|
break;
|
|
|
|
case CommentTk:
|
|
|
|
res = '<!--' + token.value + '-->';
|
|
|
|
// don't consider comments for changes of the onStartOfLine status
|
|
|
|
// XXX: convert all non-tag handlers to a similar handler
|
|
|
|
// structure as tags?
|
2012-07-27 00:27:37 +00:00
|
|
|
handler = { newlineTransparent: true };
|
2012-07-16 23:17:12 +00:00
|
|
|
break;
|
|
|
|
case NlTk:
|
|
|
|
res = '\n';
|
|
|
|
res = state.textHandler ? state.textHandler( res ) : res;
|
|
|
|
break;
|
|
|
|
case EOFTk:
|
|
|
|
res = '';
|
|
|
|
for ( var i = 0, l = state.availableNewlineCount; i < l; i++ ) {
|
|
|
|
res += '\n';
|
|
|
|
}
|
|
|
|
state.chunkCB(res);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
res = '';
|
|
|
|
console.warn( 'Unhandled token type ' + JSON.stringify( token ) );
|
|
|
|
break;
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
}
|
2012-06-14 05:10:51 +00:00
|
|
|
|
2012-06-18 15:18:20 +00:00
|
|
|
if (! dropContent || ! state.dropContent ) {
|
2012-06-27 05:08:47 +00:00
|
|
|
var newTrailingNLCount = 0;
|
2012-06-18 15:18:20 +00:00
|
|
|
if (res !== '') {
|
2012-06-20 14:31:39 +00:00
|
|
|
// Strip leading or trailing newlines from the returned string
|
2012-06-18 15:18:20 +00:00
|
|
|
var match = res.match( /^((?:\r?\n)*)((?:.*?|[\r\n]+[^\r\n])*?)((?:\r?\n)*)$/ ),
|
|
|
|
leadingNLs = match[1],
|
|
|
|
trailingNLs = match[3];
|
|
|
|
|
|
|
|
if (leadingNLs === res) {
|
2012-06-14 05:10:51 +00:00
|
|
|
// all newlines, accumulate count, and clear output
|
2012-06-18 15:18:20 +00:00
|
|
|
state.availableNewlineCount += leadingNLs.replace(/\r\n/g, '\n').length;
|
2012-06-14 05:10:51 +00:00
|
|
|
res = "";
|
2012-05-29 06:04:19 +00:00
|
|
|
} else {
|
2012-06-27 05:08:47 +00:00
|
|
|
newTrailingNLCount = trailingNLs.replace(/\r\n/g, '\n').length;
|
2012-06-18 15:18:20 +00:00
|
|
|
if ( leadingNLs !== '' ) {
|
2012-06-20 14:31:39 +00:00
|
|
|
state.availableNewlineCount += leadingNLs.replace(/\r\n/g, '\n').length;
|
2012-06-18 15:18:20 +00:00
|
|
|
}
|
|
|
|
// strip newlines
|
|
|
|
res = match[2];
|
2012-06-14 05:10:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-27 00:27:37 +00:00
|
|
|
// Check if we have a pair of identical tag tokens </p><p>; </ul><ul>; etc.
|
2012-06-18 15:18:20 +00:00
|
|
|
// that have to be separated by extra newlines and add those in.
|
2012-07-27 00:27:37 +00:00
|
|
|
if (handler.pairSepNLCount && state.prevTagToken &&
|
|
|
|
state.prevTagToken.constructor === EndTagTk &&
|
|
|
|
state.prevTagToken.name === token.name )
|
2012-06-19 23:48:52 +00:00
|
|
|
{
|
2012-06-20 14:31:39 +00:00
|
|
|
if ( state.availableNewlineCount < handler.pairSepNLCount) {
|
|
|
|
state.availableNewlineCount = handler.pairSepNLCount;
|
2012-06-19 23:48:52 +00:00
|
|
|
}
|
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
|
2012-06-27 05:08:47 +00:00
|
|
|
if (state.env.debug) {
|
2012-07-27 00:27:37 +00:00
|
|
|
console.warn(token +
|
|
|
|
", res: " + JSON.stringify( res ) +
|
|
|
|
", nl: " + state.onNewline +
|
|
|
|
", sol: " + state.onStartOfLine +
|
2012-06-27 05:08:47 +00:00
|
|
|
', eon:' + state.emitNewlineOnNextToken +
|
2012-07-27 00:27:37 +00:00
|
|
|
", #nl: " + state.availableNewlineCount +
|
2012-06-27 05:08:47 +00:00
|
|
|
', #new:' + newTrailingNLCount);
|
2012-06-16 15:09:06 +00:00
|
|
|
}
|
2012-06-27 05:08:47 +00:00
|
|
|
|
|
|
|
if (res !== '') {
|
2012-06-18 15:18:20 +00:00
|
|
|
var out = '';
|
2012-06-27 05:08:47 +00:00
|
|
|
// If this is not a html tag and the serializer is not in single-line mode,
|
2012-07-27 00:27:37 +00:00
|
|
|
// allocate a newline if
|
|
|
|
// - prev token needs a single line,
|
2012-06-27 05:08:47 +00:00
|
|
|
// - handler starts a new line and we aren't on a new line,
|
|
|
|
//
|
|
|
|
// Newline-equivalent tokens (HTML tags for example) don't get
|
|
|
|
// implicit newlines.
|
2012-06-28 04:43:35 +00:00
|
|
|
if (!handler.isNewlineEquivalent &&
|
2012-07-27 00:27:37 +00:00
|
|
|
!state.singleLineMode &&
|
|
|
|
!state.availableNewlineCount &&
|
2012-06-28 04:43:35 +00:00
|
|
|
((!res.match(/^\s*$/) && state.emitNewlineOnNextToken) ||
|
2012-06-27 05:08:47 +00:00
|
|
|
(!state.onStartOfLine && handler.startsNewline)))
|
2012-06-18 15:18:20 +00:00
|
|
|
{
|
2012-06-28 10:42:19 +00:00
|
|
|
state.availableNewlineCount = handler.defaultStartNewlineCount || 1;
|
2012-07-27 00:27:37 +00:00
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
|
2012-06-20 14:31:39 +00:00
|
|
|
// Add required # of new lines in the beginning
|
|
|
|
for (; state.availableNewlineCount; state.availableNewlineCount--) {
|
|
|
|
out += '\n';
|
|
|
|
}
|
|
|
|
|
2012-06-18 15:18:20 +00:00
|
|
|
if ( state.singleLineMode ) {
|
|
|
|
res = res.replace(/\n/g, ' ');
|
|
|
|
}
|
|
|
|
out += res;
|
2012-06-27 05:08:47 +00:00
|
|
|
state.env.dp(' =>', out);
|
|
|
|
state.chunkCB( out );
|
|
|
|
|
|
|
|
// Update new line state
|
|
|
|
// 1. If this token generated new trailing new lines, we are in a newline state again.
|
2012-07-27 00:27:37 +00:00
|
|
|
// If not, we are not! But, handle onStartOfLine specially.
|
2012-06-27 05:08:47 +00:00
|
|
|
if (newTrailingNLCount > 0) {
|
|
|
|
state.availableNewlineCount = newTrailingNLCount;
|
|
|
|
state.onNewline = true;
|
|
|
|
state.onStartOfLine = true;
|
|
|
|
} else {
|
|
|
|
state.availableNewlineCount = 0;
|
2012-06-18 15:18:20 +00:00
|
|
|
state.onNewline = false;
|
2012-06-27 05:08:47 +00:00
|
|
|
if (!handler.newlineTransparent) {
|
2012-06-19 13:24:13 +00:00
|
|
|
state.onStartOfLine = false;
|
|
|
|
}
|
2012-06-18 15:18:20 +00:00
|
|
|
}
|
2012-06-27 05:08:47 +00:00
|
|
|
|
|
|
|
// 2. Previous token nl state is no longer relevant
|
|
|
|
state.emitNewlineOnNextToken = false;
|
|
|
|
} else if ( handler.startsNewline && !state.onStartOfLine ) {
|
|
|
|
state.emitNewlineOnNextToken = true;
|
2012-06-18 15:18:20 +00:00
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
|
2012-06-18 15:18:20 +00:00
|
|
|
if (handler.endsLine) {
|
|
|
|
// Record end of line
|
|
|
|
state.emitNewlineOnNextToken = true;
|
|
|
|
}
|
|
|
|
if ( handler.singleLine > 0 ) {
|
|
|
|
state.singleLineMode += handler.singleLine;
|
|
|
|
}
|
|
|
|
}
|
2012-05-16 14:53:18 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Serialize an HTML DOM document.
|
|
|
|
*/
|
|
|
|
WSP.serializeDOM = function( node, chunkCB ) {
|
2012-06-22 22:51:50 +00:00
|
|
|
try {
|
|
|
|
var state = $.extend({}, this.initialState, this.options);
|
2012-06-28 04:43:35 +00:00
|
|
|
state.serializer = this;
|
2012-06-22 22:51:50 +00:00
|
|
|
//console.warn( node.innerHTML );
|
|
|
|
if ( ! chunkCB ) {
|
|
|
|
var out = [];
|
|
|
|
state.chunkCB = out.push.bind( out );
|
|
|
|
this._serializeDOM( node, state );
|
|
|
|
this._serializeToken( state, new EOFTk() );
|
|
|
|
return out.join('');
|
|
|
|
} else {
|
|
|
|
state.chunkCB = chunkCB;
|
|
|
|
this._serializeDOM( node, state );
|
|
|
|
this._serializeToken( state, new EOFTk() );
|
|
|
|
}
|
|
|
|
} catch (e) {
|
|
|
|
console.warn(e.stack);
|
2012-05-16 14:53:18 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-05-17 11:20:39 +00:00
|
|
|
/**
|
|
|
|
* Internal worker. Recursively serialize a DOM subtree by creating tokens and
|
|
|
|
* calling _serializeToken on each of these.
|
|
|
|
*/
|
2012-05-21 12:46:07 +00:00
|
|
|
WSP._serializeDOM = function( node, state ) {
|
2012-05-16 14:53:18 +00:00
|
|
|
// serialize this node
|
|
|
|
switch( node.nodeType ) {
|
|
|
|
case Node.ELEMENT_NODE:
|
|
|
|
//console.warn( node.nodeName.toLowerCase() );
|
|
|
|
var children = node.childNodes,
|
|
|
|
name = node.nodeName.toLowerCase(),
|
2012-05-23 15:50:35 +00:00
|
|
|
tkAttribs = this._getDOMAttribs(node.attributes),
|
|
|
|
tkRTInfo = this._getDOMRTInfo(node.attributes);
|
2012-05-17 11:20:39 +00:00
|
|
|
|
2012-06-18 15:18:20 +00:00
|
|
|
// Serialize the start token
|
|
|
|
this._serializeToken(state, new TagTk(name, tkAttribs, tkRTInfo));
|
2012-06-16 15:09:06 +00:00
|
|
|
|
|
|
|
// then children
|
2012-05-23 15:50:35 +00:00
|
|
|
for ( var i = 0, l = children.length; i < l; i++ ) {
|
|
|
|
this._serializeDOM( children[i], state );
|
2012-05-16 14:53:18 +00:00
|
|
|
}
|
2012-06-16 15:09:06 +00:00
|
|
|
|
2012-06-18 15:18:20 +00:00
|
|
|
// then the end token
|
|
|
|
this._serializeToken(state, new EndTagTk(name, tkAttribs, tkRTInfo));
|
2012-06-28 04:43:35 +00:00
|
|
|
|
2012-05-16 14:53:18 +00:00
|
|
|
break;
|
|
|
|
case Node.TEXT_NODE:
|
2012-05-21 12:46:07 +00:00
|
|
|
this._serializeToken( state, node.data );
|
2012-05-16 14:53:18 +00:00
|
|
|
break;
|
|
|
|
case Node.COMMENT_NODE:
|
2012-06-18 15:18:20 +00:00
|
|
|
// delay the newline creation until after the comment
|
|
|
|
var savedEmitNewlineOnNextToken = state.emitNewlineOnNextToken;
|
2012-06-16 15:09:06 +00:00
|
|
|
state.emitNewlineOnNextToken = false;
|
2012-05-21 12:46:07 +00:00
|
|
|
this._serializeToken( state, new CommentTk( node.data ) );
|
2012-06-18 15:18:20 +00:00
|
|
|
state.emitNewlineOnNextToken = savedEmitNewlineOnNextToken;
|
2012-05-16 14:53:18 +00:00
|
|
|
break;
|
|
|
|
default:
|
2012-07-27 00:27:37 +00:00
|
|
|
console.warn( "Unhandled node type: " +
|
2012-05-18 08:03:16 +00:00
|
|
|
node.outerHTML );
|
2012-05-16 14:53:18 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
WSP._getDOMAttribs = function( attribs ) {
|
|
|
|
// convert to list fo key-value pairs
|
|
|
|
var out = [];
|
|
|
|
for ( var i = 0, l = attribs.length; i < l; i++ ) {
|
|
|
|
var attrib = attribs.item(i);
|
2012-07-24 17:24:22 +00:00
|
|
|
if ( attrib.name !== 'data-parsoid' ) {
|
2012-05-16 14:53:18 +00:00
|
|
|
out.push( { k: attrib.name, v: attrib.value } );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
};
|
|
|
|
|
|
|
|
WSP._getDOMRTInfo = function( attribs ) {
|
2012-07-24 17:24:22 +00:00
|
|
|
if ( attribs['data-parsoid'] ) {
|
|
|
|
return JSON.parse( attribs['data-parsoid'].value || '{}' );
|
2012-05-16 14:53:18 +00:00
|
|
|
} else {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-06-19 13:24:13 +00:00
|
|
|
|
2012-05-16 14:53:18 +00:00
|
|
|
// Quick HACK: define Node constants locally
|
|
|
|
// https://developer.mozilla.org/en/nodeType
|
|
|
|
var Node = {
|
|
|
|
ELEMENT_NODE: 1,
|
|
|
|
ATTRIBUTE_NODE: 2,
|
|
|
|
TEXT_NODE: 3,
|
|
|
|
CDATA_SECTION_NODE: 4,
|
|
|
|
ENTITY_REFERENCE_NODE: 5,
|
|
|
|
ENTITY_NODE: 6,
|
|
|
|
PROCESSING_INSTRUCTION_NODE: 7,
|
|
|
|
COMMENT_NODE: 8,
|
|
|
|
DOCUMENT_NODE: 9,
|
|
|
|
DOCUMENT_TYPE_NODE: 10,
|
|
|
|
DOCUMENT_FRAGMENT_NODE: 11,
|
|
|
|
NOTATION_NODE: 12
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
if (typeof module == "object") {
|
|
|
|
module.exports.WikitextSerializer = WikitextSerializer;
|
|
|
|
}
|