mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-15 18:39:52 +00:00
e584e35ecb
Basically only prefix all bullets if the serialization output is going to be in start-of-line context. The test for that is currently inline, but should perhaps be factored out to a method or state flag instead. We could alternatively consider to return the start-of-line prefix and let it be used in _serializeToken in case we end up in start-of-line context. This patch also fixes a newline issue on input like this: :d1 ::: d3 Both the list and list item handlers now set the startsNewline flag dynamically depending on the context, so that we don't depend on the suppression of newlines from list syntax by the singleLineMode any more. There is still an extra newline inserted between list items in the following example: ;t1 :d1 ;;t2 ::d2 This looks like a bug in the produced DOM and not in the serializer, since the outer definition list is closed and re-opened between d1 and t2. Change-Id: I78e3a1ef34cf9159d5a1e86fb64c774ff111e71d
969 lines
25 KiB
JavaScript
969 lines
25 KiB
JavaScript
/**
|
|
* Serializes a chunk of tokens or an HTML DOM to MediaWiki's wikitext flavor.
|
|
*
|
|
* @class
|
|
* @constructor
|
|
* @param options {Object} List of options for serialization
|
|
*/
|
|
WikitextSerializer = function( options ) {
|
|
this.options = $.extend( {
|
|
// defaults
|
|
}, options || {} );
|
|
};
|
|
|
|
require('./core-upgrade.js');
|
|
var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer;
|
|
|
|
var WSP = WikitextSerializer.prototype;
|
|
|
|
WSP.defaultOptions = {
|
|
onNewline: true, // actually following start of file or a real newline
|
|
onStartOfLine : true, // in start-of-line context, not necessarily after newline
|
|
listStack: [],
|
|
lastHandler: null,
|
|
availableNewlineCount: 0, // collected (and stripped) newlines from the input
|
|
singleLineMode: 0 // single-line syntactical context: list items, headings etc
|
|
};
|
|
|
|
WSP.escapeWikiText = function ( state, text ) {
|
|
// tokenize the text
|
|
var p = new PegTokenizer( state.env ),
|
|
tokens = [];
|
|
p.on('chunk', function ( chunk ) {
|
|
//console.warn( JSON.stringify(chunk));
|
|
tokens.push.apply( tokens, chunk );
|
|
});
|
|
p.on('end', function(){
|
|
//console.warn( JSON.stringify('end'));
|
|
});
|
|
// this is synchronous for now, will still need sync version later, or
|
|
// alternatively make text processing in the serializer async
|
|
var prefixedText = text;
|
|
if ( ! state.onNewline ) {
|
|
// Prefix '_' so that no start-of-line wiki syntax matches. Strip it from
|
|
// the result.
|
|
prefixedText = '_' + text;
|
|
}
|
|
|
|
if ( state.inIndentPre ) {
|
|
prefixedText = prefixedText.replace(/(\r?\n)/g, '$1_');
|
|
}
|
|
|
|
// FIXME: parse using
|
|
p.process( prefixedText );
|
|
|
|
|
|
if ( ! state.onNewline ) {
|
|
// now strip the leading underscore.
|
|
if ( tokens[0] === '_' ) {
|
|
tokens.shift();
|
|
} else {
|
|
tokens[0] = tokens[0].substr(1);
|
|
}
|
|
}
|
|
|
|
// state.inIndentPre is handled on the complete output
|
|
|
|
//
|
|
// wrap any run of non-text tokens into <nowiki> tags using the source
|
|
// offsets of top-level productions
|
|
// return the updated text
|
|
var outTexts = [],
|
|
nonTextTokenAccum = [],
|
|
cursor = 0;
|
|
function wrapNonTextTokens () {
|
|
if ( nonTextTokenAccum.length ) {
|
|
var missingRangeEnd = false;
|
|
// TODO: make sure the source positions are always set!
|
|
// The start range
|
|
var startRange = nonTextTokenAccum[0].dataAttribs.tsr,
|
|
rangeStart, rangeEnd;
|
|
if ( ! startRange ) {
|
|
console.warn( 'No tsr on ' + nonTextTokenAccum[0] );
|
|
rangeStart = cursor;
|
|
} else {
|
|
rangeStart = startRange[0];
|
|
if ( ! state.onNewline ) {
|
|
// compensate for underscore.
|
|
rangeStart--;
|
|
}
|
|
cursor = rangeStart;
|
|
}
|
|
|
|
var endRange = nonTextTokenAccum.last().dataAttribs.tsr;
|
|
if ( ! endRange ) {
|
|
// FIXME: improve this!
|
|
//rangeEnd = state.env.tokensToString( tokens ).length;
|
|
// Be conservative and extend the range to the end for now.
|
|
// Alternatives: only extend it to the next token with range
|
|
// info on it.
|
|
missingRangeEnd = true;
|
|
rangeEnd = text.length;
|
|
} else {
|
|
rangeEnd = endRange[1];
|
|
if ( ! state.onNewline ) {
|
|
// compensate for underscore.
|
|
rangeEnd--;
|
|
}
|
|
}
|
|
|
|
var escapedSource = text.substr( rangeStart, rangeEnd - rangeStart )
|
|
.replace( /<(\/?nowiki)>/g, '<$1>' );
|
|
outTexts.push( '<nowiki>' );
|
|
outTexts.push( escapedSource );
|
|
outTexts.push( '</nowiki>' );
|
|
cursor += 17 + escapedSource.length;
|
|
if ( missingRangeEnd ) {
|
|
throw 'No tsr on end token: ' + nonTextTokenAccum.last();
|
|
}
|
|
nonTextTokenAccum = [];
|
|
}
|
|
}
|
|
try {
|
|
for ( var i = 0, l = tokens.length; i < l; i++ ) {
|
|
var token = tokens[i];
|
|
switch ( token.constructor ) {
|
|
case String:
|
|
wrapNonTextTokens();
|
|
outTexts.push( token );
|
|
cursor += token.length;
|
|
break;
|
|
case NlTk:
|
|
wrapNonTextTokens();
|
|
outTexts.push( '\n' );
|
|
cursor++;
|
|
break;
|
|
case EOFTk:
|
|
wrapNonTextTokens();
|
|
break;
|
|
default:
|
|
//console.warn('pushing ' + token);
|
|
nonTextTokenAccum.push(token);
|
|
break;
|
|
}
|
|
}
|
|
} catch ( e ) {
|
|
console.warn( e );
|
|
}
|
|
//console.warn( 'escaped wikiText: ' + outTexts.join('') );
|
|
var res = outTexts.join('');
|
|
if ( state.inIndentPre ) {
|
|
return res.replace(/\n_/g, '\n');
|
|
} else {
|
|
return res;
|
|
}
|
|
};
|
|
|
|
var id = function(v) {
|
|
return function( state ) {
|
|
return v;
|
|
};
|
|
};
|
|
|
|
WSP._listHandler = function( handler, bullet, state, token ) {
|
|
function isListItem(token) {
|
|
if (token.constructor !== TagTk) return false;
|
|
|
|
var tokenName = token.name;
|
|
return (tokenName === 'li' || tokenName === 'dt' || tokenName === 'dd');
|
|
}
|
|
if ( state.singleLineMode ) {
|
|
state.singleLineMode--;
|
|
}
|
|
|
|
var bullets, res;
|
|
var stack = state.listStack;
|
|
if (stack.length === 0) {
|
|
bullets = bullet;
|
|
res = bullets;
|
|
handler.startsNewline = true;
|
|
} else {
|
|
var curList = stack[stack.length - 1];
|
|
//console.warn(JSON.stringify( stack ));
|
|
bullets = curList.bullets + curList.itemBullet + bullet;
|
|
curList.itemCount++;
|
|
if ( // deeply nested list
|
|
curList.itemCount > 2 ||
|
|
// A nested list, not directly after a list item
|
|
(curList.itemCount > 1 && !isListItem(state.prevToken))) {
|
|
res = bullets;
|
|
handler.startsNewline = true;
|
|
} else {
|
|
res = bullet;
|
|
handler.startsNewline = false;
|
|
}
|
|
}
|
|
stack.push({ itemCount: 0, bullets: bullets, itemBullet: ''});
|
|
state.env.dp('lh res', bullets, res, handler );
|
|
return res;
|
|
};
|
|
|
|
WSP._listEndHandler = function( state, token ) {
|
|
state.listStack.pop();
|
|
return '';
|
|
};
|
|
|
|
WSP._listItemHandler = function ( handler, bullet, state, token ) {
|
|
var stack = state.listStack;
|
|
var curList = stack[stack.length - 1];
|
|
curList.itemCount++;
|
|
var res;
|
|
curList.itemBullet = bullet;
|
|
if (curList.itemCount > 1 && // don't prefix bullets on the first descent
|
|
// Check if the item is / will also be in start of line context,
|
|
// and prefix all bullets if so.
|
|
// XXX gwicke: abstract out the 'will be on start of line if
|
|
// output is not empty' bit to method or flag.
|
|
( state.onStartOfLine ||
|
|
state.availableNewlineCount ||
|
|
state.emitNewlineOnNextToken ||
|
|
// separation between the same tokens would be triggered
|
|
( state.prevToken.constructor === EndTagTk &&
|
|
state.prevToken.name === token.name) ) )
|
|
{
|
|
handler.startsNewline = true;
|
|
res = curList.bullets + bullet;
|
|
} else {
|
|
handler.startsNewline = false;
|
|
res = bullet;
|
|
}
|
|
state.env.dp( 'lih', token, res, handler );
|
|
return res;
|
|
};
|
|
|
|
WSP._serializeTableTag = function ( symbol, optionEndSymbol, state, token ) {
|
|
if ( token.attribs.length ) {
|
|
return symbol + ' ' + WSP._serializeAttributes( token.attribs ) + optionEndSymbol;
|
|
} else {
|
|
return symbol;
|
|
}
|
|
};
|
|
|
|
WSP._emptyTags = { br: true, meta: true };
|
|
|
|
WSP._serializeHTMLTag = function ( state, token ) {
|
|
var close = '';
|
|
if ( WSP._emptyTags[ token.name ] ) {
|
|
close = '/';
|
|
}
|
|
|
|
if ( token.name === 'pre' ) {
|
|
// html-syntax pre is very similar to nowiki
|
|
state.inHTMLPre = true;
|
|
}
|
|
|
|
// Swallow required newline from previous token on encountering a HTML tag
|
|
//state.emitNewlineOnNextToken = false;
|
|
|
|
if ( token.attribs.length ) {
|
|
return '<' + token.name + ' ' +
|
|
WSP._serializeAttributes( token.attribs ) + close + '>';
|
|
} else {
|
|
return '<' + token.name + close + '>';
|
|
}
|
|
};
|
|
|
|
WSP._serializeHTMLEndTag = function ( state, token ) {
|
|
if ( token.name === 'pre' ) {
|
|
state.inHTMLPre = false;
|
|
}
|
|
if ( ! WSP._emptyTags[ token.name ] ) {
|
|
return '</' + token.name + '>';
|
|
} else {
|
|
return '';
|
|
}
|
|
};
|
|
|
|
WSP._linkHandler = function( state, token ) {
|
|
//return '[[';
|
|
// TODO: handle internal/external links etc using RDFa and dataAttribs
|
|
// Also convert unannotated html links to external wiki links for html
|
|
// import. Might want to consider converting relative links without path
|
|
// component and file extension to wiki links.
|
|
|
|
var env = state.env;
|
|
var attribDict = env.KVtoHash( token.attribs );
|
|
if ( attribDict.rel && attribDict.href !== undefined ) {
|
|
var tokenData = token.dataAttribs;
|
|
if ( attribDict.rel === 'mw:wikiLink' ) {
|
|
var base = env.wgScriptPath;
|
|
var href = attribDict.href;
|
|
var prefix = href.substr(0, base.length);
|
|
var target = (prefix === base) ? href.substr(base.length) : href;
|
|
target = decodeURIComponent(target);
|
|
|
|
var tail = tokenData.tail;
|
|
if ( tail && tail.length ) {
|
|
state.dropTail = tail;
|
|
target = tokenData.gc ? tokenData.sHref : target.replace( /_/g, ' ' );
|
|
} else {
|
|
var origLinkTgt = tokenData.sHref;
|
|
if (origLinkTgt) {
|
|
// Normalize the source target so that we can compare it
|
|
// with href.
|
|
var normalizedOrigLinkTgt = env.normalizeTitle( env.tokensToString(origLinkTgt) );
|
|
if ( normalizedOrigLinkTgt === target ) {
|
|
// Non-standard capitalization
|
|
target = origLinkTgt;
|
|
}
|
|
} else {
|
|
target = target.replace( /_/g, ' ' );
|
|
}
|
|
}
|
|
|
|
// FIXME: Properly handle something like [[{{Foo}}]]s
|
|
target = env.tokensToString( target );
|
|
|
|
if ( tokenData.gc ) {
|
|
state.dropContent = true;
|
|
return '[[' + target;
|
|
} else {
|
|
return '[[' + target + '|';
|
|
}
|
|
} else if ( attribDict.rel === 'mw:extLink' ) {
|
|
if ( tokenData.stx === 'urllink' ) {
|
|
state.dropContent = true;
|
|
return attribDict.href;
|
|
} else if ( tokenData.gc ) {
|
|
state.dropContent = true;
|
|
return '[' + attribDict.href;
|
|
} else {
|
|
return '[' + attribDict.href + ' ';
|
|
}
|
|
} else {
|
|
return WSP._serializeHTMLTag( state, token );
|
|
}
|
|
} else {
|
|
return WSP._serializeHTMLTag( state, token );
|
|
}
|
|
|
|
//if ( rtinfo.type === 'wikilink' ) {
|
|
// return '[[' + rtinfo.target + ']]';
|
|
//} else {
|
|
// // external link
|
|
// return '[' + rtinfo.
|
|
};
|
|
WSP._linkEndHandler = function( state, token ) {
|
|
var attribDict = state.env.KVtoHash( token.attribs );
|
|
if ( attribDict.rel && attribDict.href !== undefined ) {
|
|
if ( attribDict.rel === 'mw:wikiLink' ) {
|
|
state.dropContent = false;
|
|
state.dropTail = false;
|
|
return "]]" + (token.dataAttribs.tail ? token.dataAttribs.tail : "");
|
|
} else if ( attribDict.rel === 'mw:extLink' ) {
|
|
state.dropContent = false;
|
|
return (token.dataAttribs.stx === 'urllink') ? '' : ']';
|
|
} else {
|
|
return WSP._serializeHTMLEndTag( state, token );
|
|
}
|
|
} else {
|
|
return WSP._serializeHTMLEndTag( state, token );
|
|
}
|
|
};
|
|
|
|
WSP.tagHandlers = {
|
|
body: {
|
|
start: {
|
|
handle: function(state, token) {
|
|
// swallow trailing new line
|
|
state.emitNewlineOnNextToken = false;
|
|
return '';
|
|
}
|
|
}
|
|
},
|
|
ul: {
|
|
start: {
|
|
startsNewline : true,
|
|
handle: function ( state, token ) {
|
|
return WSP._listHandler( this, '*', state, token );
|
|
},
|
|
pairSepNLCount: 2,
|
|
newlineTransparent: true
|
|
},
|
|
end: {
|
|
endsLine: true,
|
|
handle: WSP._listEndHandler
|
|
}
|
|
},
|
|
ol: {
|
|
start: {
|
|
startsNewline : true,
|
|
handle: function ( state, token ) {
|
|
return WSP._listHandler( this, '#', state, token );
|
|
},
|
|
pairSepNLCount: 2,
|
|
newlineTransparent: true
|
|
},
|
|
end: {
|
|
endsLine : true,
|
|
handle: WSP._listEndHandler
|
|
}
|
|
},
|
|
dl: {
|
|
start: {
|
|
startsNewline : true,
|
|
handle: function ( state, token ) {
|
|
return WSP._listHandler( this, '', state, token );
|
|
},
|
|
pairSepNLCount: 2
|
|
},
|
|
end: {
|
|
endsLine: true,
|
|
handle: WSP._listEndHandler
|
|
}
|
|
},
|
|
li: {
|
|
start: {
|
|
handle: function ( state, token ) {
|
|
return WSP._listItemHandler( this, '', state, token );
|
|
},
|
|
singleLine: 1,
|
|
pairSepNLCount: 1
|
|
},
|
|
end: {
|
|
singleLine: -1
|
|
}
|
|
},
|
|
// XXX: handle single-line vs. multi-line dls etc
|
|
dt: {
|
|
start: {
|
|
singleLine: 1,
|
|
handle: function ( state, token ) {
|
|
return WSP._listItemHandler( this, ';', state, token );
|
|
},
|
|
pairSepNLCount: 1,
|
|
newlineTransparent: true
|
|
},
|
|
end: {
|
|
singleLine: -1
|
|
}
|
|
},
|
|
dd: {
|
|
start: {
|
|
singleLine: 1,
|
|
handle: function ( state, token ) {
|
|
return WSP._listItemHandler( this, ':', state, token );
|
|
},
|
|
pairSepNLCount: 1,
|
|
newlineTransparent: true
|
|
},
|
|
end: {
|
|
endsLine: true,
|
|
singleLine: -1
|
|
}
|
|
},
|
|
// XXX: handle options
|
|
table: {
|
|
start: {
|
|
handle: WSP._serializeTableTag.bind(null, "{|", '')
|
|
},
|
|
end: {
|
|
handle: function(state, token) {
|
|
if ( state.prevTagToken && state.prevTagToken.name === 'tr' ) {
|
|
this.startsNewline = true;
|
|
} else {
|
|
this.startsNewline = false;
|
|
}
|
|
return "|}";
|
|
}
|
|
}
|
|
},
|
|
tbody: { start: { ignore: true }, end: { ignore: true } },
|
|
th: {
|
|
start: {
|
|
handle: function ( state, token ) {
|
|
if ( token.dataAttribs.stx_v === 'row' ) {
|
|
this.startsNewline = false;
|
|
return WSP._serializeTableTag("!!", ' |', state, token);
|
|
} else {
|
|
this.startsNewline = true;
|
|
return WSP._serializeTableTag( "!", ' |', state, token);
|
|
}
|
|
}
|
|
}
|
|
},
|
|
tr: {
|
|
start: {
|
|
handle: function ( state, token ) {
|
|
if ( state.prevToken.constructor === TagTk && state.prevToken.name === 'tbody' ) {
|
|
// Omit for first row in a table. XXX: support optional trs
|
|
// for first line (in source wikitext) too using some flag in
|
|
// data-mw (stx: 'wikitext' ?)
|
|
return '';
|
|
} else {
|
|
return WSP._serializeTableTag("|-", '', state, token );
|
|
}
|
|
},
|
|
startsNewline: true
|
|
}
|
|
},
|
|
td: {
|
|
start: {
|
|
handle: function ( state, token ) {
|
|
if ( token.dataAttribs.stx_v === 'row' ) {
|
|
this.startsNewline = false;
|
|
return WSP._serializeTableTag("||", ' |', state, token);
|
|
} else {
|
|
this.startsNewline = true;
|
|
return WSP._serializeTableTag("|", ' |', state, token);
|
|
}
|
|
}
|
|
}
|
|
},
|
|
caption: {
|
|
start: {
|
|
startsNewline: true,
|
|
handle: WSP._serializeTableTag.bind(null, "|+", ' |')
|
|
}
|
|
},
|
|
p: {
|
|
make: function(state, token) {
|
|
// Special case handling in a list context
|
|
// VE embeds list content in paragraph tags
|
|
return state.singleLineMode ? WSP.defaultHTMLTagHandler : this;
|
|
},
|
|
start: {
|
|
startsNewline : true,
|
|
pairSepNLCount: 2
|
|
},
|
|
end: {
|
|
endsLine: true
|
|
}
|
|
},
|
|
// XXX: support indent variant instead by registering a newline handler?
|
|
pre: {
|
|
start: {
|
|
startsNewline: true,
|
|
handle: function( state, token ) {
|
|
state.inIndentPre = true;
|
|
state.textHandler = function( t ) {
|
|
return t.replace(/\n/g, '\n ' );
|
|
};
|
|
return ' ';
|
|
}
|
|
},
|
|
end: {
|
|
endsLine: true,
|
|
handle: function( state, token) {
|
|
state.inIndentPre = false;
|
|
state.textHandler = null;
|
|
return '';
|
|
}
|
|
}
|
|
},
|
|
meta: {
|
|
start: {
|
|
handle: function ( state, token ) {
|
|
var argDict = state.env.KVtoHash( token.attribs );
|
|
if ( argDict['typeof'] === 'mw:tag' ) {
|
|
// we use this currently for nowiki and noinclude & co
|
|
this.newlineTransparent = true;
|
|
if ( argDict.content === 'nowiki' ) {
|
|
state.inNoWiki = true;
|
|
} else if ( argDict.content === '/nowiki' ) {
|
|
state.inNoWiki = false;
|
|
} else {
|
|
console.warn( JSON.stringify( argDict ) );
|
|
}
|
|
return '<' + argDict.content + '>';
|
|
} else {
|
|
this.newlineTransparent = false;
|
|
return WSP._serializeHTMLTag( state, token );
|
|
}
|
|
}
|
|
}
|
|
},
|
|
hr: {
|
|
start: {
|
|
startsNewline: true,
|
|
endsLine: true,
|
|
handle: id("----")
|
|
}
|
|
},
|
|
h1: {
|
|
start: { startsNewline: true, handle: id("=") },
|
|
end: { endsLine: true, handle: id("=") }
|
|
},
|
|
h2: {
|
|
start: { startsNewline: true, handle: id("==") },
|
|
end: { endsLine: true, handle: id("==") }
|
|
},
|
|
h3: {
|
|
start: { startsNewline: true, handle: id("===") },
|
|
end: { endsLine: true, handle: id("===") }
|
|
},
|
|
h4: {
|
|
start: { startsNewline: true, handle: id("====") },
|
|
end: { endsLine: true, handle: id("====") }
|
|
},
|
|
h5: {
|
|
start: { startsNewline: true, handle: id("=====") },
|
|
end: { endsLine: true, handle: id("=====") }
|
|
},
|
|
h6: {
|
|
start: { startsNewline: true, handle: id("======") },
|
|
end: { endsLine: true, handle: id("======") }
|
|
},
|
|
br: {
|
|
start: {
|
|
startsNewline: true,
|
|
endsLine: true,
|
|
handle: id("")
|
|
}
|
|
},
|
|
b: {
|
|
start: { handle: id("'''") },
|
|
end: { handle: id("'''") }
|
|
},
|
|
i: {
|
|
start: { handle: id("''") },
|
|
end: { handle: id("''") }
|
|
},
|
|
a: {
|
|
start: { handle: WSP._linkHandler },
|
|
end: { handle: WSP._linkEndHandler }
|
|
}
|
|
};
|
|
|
|
|
|
WSP._serializeAttributes = function ( attribs ) {
|
|
var out = [];
|
|
for ( var i = 0, l = attribs.length; i < l; i++ ) {
|
|
var kv = attribs[i];
|
|
if (kv.k.length) {
|
|
if ( kv.v.length ) {
|
|
out.push( kv.k + '=' +
|
|
'"' + kv.v.replace( '"', '"' ) + '"');
|
|
} else {
|
|
out.push( kv.k );
|
|
}
|
|
} else if ( kv.v.length ) {
|
|
// not very likely..
|
|
out.push( kv.v );
|
|
}
|
|
}
|
|
// XXX: round-trip optional whitespace / line breaks etc
|
|
return out.join(' ');
|
|
};
|
|
|
|
/**
|
|
* Serialize a chunk of tokens
|
|
*/
|
|
WSP.serializeTokens = function( tokens, chunkCB ) {
|
|
var state = $.extend({}, this.defaultOptions, this.options),
|
|
i, l;
|
|
if ( chunkCB === undefined ) {
|
|
var out = [];
|
|
state.chunkCB = out.push.bind(out);
|
|
for ( i = 0, l = tokens.length; i < l; i++ ) {
|
|
this._serializeToken( state, tokens[i] );
|
|
}
|
|
return out;
|
|
} else {
|
|
state.chunkCB = chunkCB;
|
|
for ( i = 0, l = tokens.length; i < l; i++ ) {
|
|
this._serializeToken( state, tokens[i] );
|
|
}
|
|
}
|
|
};
|
|
|
|
WSP.defaultHTMLTagHandler = {
|
|
start: { handle: WSP._serializeHTMLTag },
|
|
end : { handle: WSP._serializeHTMLEndTag }
|
|
};
|
|
|
|
WSP._getTokenHandler = function(state, token) {
|
|
var handler;
|
|
if (token.dataAttribs.stx === 'html') {
|
|
handler = this.defaultHTMLTagHandler;
|
|
} else {
|
|
var tname = token.name;
|
|
handler = this.tagHandlers[tname];
|
|
if ( handler && handler.make ) {
|
|
handler = handler.make(state, token);
|
|
}
|
|
}
|
|
|
|
if ( ! handler ) {
|
|
handler = this.defaultHTMLTagHandler;
|
|
}
|
|
if ( token.constructor === TagTk || token.constructor === SelfclosingTagTk ) {
|
|
return handler.start || {};
|
|
} else {
|
|
return handler.end || {};
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Serialize a token.
|
|
*/
|
|
WSP._serializeToken = function ( state, token ) {
|
|
var handler = {},
|
|
res = '',
|
|
dropContent = state.dropContent;
|
|
|
|
state.prevToken = state.curToken;
|
|
state.curToken = token;
|
|
|
|
|
|
switch( token.constructor ) {
|
|
case TagTk:
|
|
case SelfclosingTagTk:
|
|
handler = WSP._getTokenHandler( state, token );
|
|
if ( ! handler.ignore ) {
|
|
state.prevTagToken = state.currTagToken;
|
|
state.currTagToken = token;
|
|
res = handler.handle ? handler.handle( state, token ) : '';
|
|
}
|
|
break;
|
|
case EndTagTk:
|
|
handler = WSP._getTokenHandler( state, token );
|
|
if ( ! handler.ignore ) {
|
|
state.prevTagToken = state.currTagToken;
|
|
state.currTagToken = token;
|
|
if ( handler.singleLine < 0 && state.singleLineMode ) {
|
|
state.singleLineMode--;
|
|
}
|
|
res = handler.handle ? handler.handle( state, token ) : '';
|
|
}
|
|
break;
|
|
case String:
|
|
res = ( state.inNoWiki || state.inHTMLPre ) ? token
|
|
: this.escapeWikiText( state, token );
|
|
res = state.textHandler ? state.textHandler( res ) : res;
|
|
break;
|
|
case CommentTk:
|
|
res = '<!--' + token.value + '-->';
|
|
// don't consider comments for changes of the onStartOfLine status
|
|
// XXX: convert all non-tag handlers to a similar handler
|
|
// structure as tags?
|
|
handler = { newlineTransparent: true };
|
|
break;
|
|
case NlTk:
|
|
res = '\n';
|
|
res = state.textHandler ? state.textHandler( res ) : res;
|
|
break;
|
|
case EOFTk:
|
|
res = '';
|
|
for ( var i = 0, l = state.availableNewlineCount; i < l; i++ ) {
|
|
res += '\n';
|
|
}
|
|
state.chunkCB(res);
|
|
break;
|
|
default:
|
|
res = '';
|
|
console.warn( 'Unhandled token type ' + JSON.stringify( token ) );
|
|
break;
|
|
}
|
|
|
|
|
|
if (! dropContent || ! state.dropContent ) {
|
|
|
|
var newNLCount = 0;
|
|
if (res !== '') {
|
|
// Strip leading or trailing newlines from the returned string
|
|
var match = res.match( /^((?:\r?\n)*)((?:.*?|[\r\n]+[^\r\n])*?)((?:\r?\n)*)$/ ),
|
|
leadingNLs = match[1],
|
|
trailingNLs = match[3];
|
|
|
|
if (leadingNLs === res) {
|
|
// all newlines, accumulate count, and clear output
|
|
state.availableNewlineCount += leadingNLs.replace(/\r\n/g, '\n').length;
|
|
res = "";
|
|
} else {
|
|
newNLCount = trailingNLs.replace(/\r\n/g, '\n').length;
|
|
if ( leadingNLs !== '' ) {
|
|
state.availableNewlineCount += leadingNLs.replace(/\r\n/g, '\n').length;
|
|
}
|
|
// strip newlines
|
|
res = match[2];
|
|
}
|
|
}
|
|
|
|
// Check if we have a pair of identical tag tokens </p><p>; </ul><ul>; etc.
|
|
// that have to be separated by extra newlines and add those in.
|
|
if (handler.pairSepNLCount && state.prevTagToken &&
|
|
state.prevTagToken.constructor === EndTagTk &&
|
|
state.prevTagToken.name === token.name )
|
|
{
|
|
if ( state.availableNewlineCount < handler.pairSepNLCount) {
|
|
state.availableNewlineCount = handler.pairSepNLCount;
|
|
}
|
|
}
|
|
|
|
if ( state.env.debug ) {
|
|
console.warn(token + " -> " + JSON.stringify( res ) +
|
|
"\n onnl: " + state.onNewline +
|
|
", #nl: is" + state.availableNewlineCount + '/new' + newNLCount +
|
|
', emitOnNext:' + state.emitNewlineOnNextToken);
|
|
}
|
|
if (res !== '' ) {
|
|
var out = '';
|
|
// Prev token's new line token
|
|
if ( !state.singleLineMode &&
|
|
( ( !res.match(/^\s*$/) && state.emitNewlineOnNextToken ) ||
|
|
( handler.startsNewline && !state.onStartOfLine ) ) )
|
|
{
|
|
// Emit new line, if necessary
|
|
if ( ! state.availableNewlineCount ) {
|
|
state.availableNewlineCount++;
|
|
}
|
|
state.emitNewlineOnNextToken = false;
|
|
}
|
|
|
|
if ( state.availableNewlineCount ) {
|
|
state.onNewline = true;
|
|
state.onStartOfLine = true;
|
|
}
|
|
|
|
// Add required # of new lines in the beginning
|
|
for (; state.availableNewlineCount; state.availableNewlineCount--) {
|
|
out += '\n';
|
|
}
|
|
|
|
state.availableNewlineCount = newNLCount;
|
|
|
|
// FIXME: This might modify not just the last content token in a
|
|
// link, which would be wrong. We'll likely have to collect tokens
|
|
// between a tags instead, and strip only the last content token.
|
|
if (state.dropTail && res.substr(- state.dropTail.length) === state.dropTail) {
|
|
res = res.substr(0, res.length - state.dropTail.length);
|
|
}
|
|
|
|
if ( state.singleLineMode ) {
|
|
res = res.replace(/\n/g, ' ');
|
|
}
|
|
out += res;
|
|
if ( res !== '' ) {
|
|
state.onNewline = false;
|
|
if ( !handler.newlineTransparent ) {
|
|
state.onStartOfLine = false;
|
|
}
|
|
}
|
|
state.env.dp(' =>', out);
|
|
state.chunkCB( out );
|
|
} else {
|
|
state.availableNewlineCount += newNLCount;
|
|
if ( handler.startsNewline && ! state.onStartOfLine ) {
|
|
state.emitNewlineOnNextToken = true;
|
|
}
|
|
}
|
|
/* else {
|
|
console.warn("SILENT: tok: " + token + ", res: <" + res + ">" + ", onnl: " + state.onNewline + ", # nls: " + state.availableNewlineCount);
|
|
}
|
|
*/
|
|
|
|
if (handler.endsLine) {
|
|
// Record end of line
|
|
state.emitNewlineOnNextToken = true;
|
|
}
|
|
if ( handler.singleLine > 0 ) {
|
|
state.singleLineMode += handler.singleLine;
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Serialize an HTML DOM document.
|
|
*/
|
|
WSP.serializeDOM = function( node, chunkCB ) {
|
|
var state = $.extend({}, this.defaultOptions, this.options);
|
|
//console.warn( node.innerHTML );
|
|
if ( ! chunkCB ) {
|
|
var out = [];
|
|
state.chunkCB = out.push.bind( out );
|
|
this._serializeDOM( node, state );
|
|
this._serializeToken( state, new EOFTk() );
|
|
return out.join('');
|
|
} else {
|
|
state.chunkCB = chunkCB;
|
|
this._serializeDOM( node, state );
|
|
this._serializeToken( state, new EOFTk() );
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Internal worker. Recursively serialize a DOM subtree by creating tokens and
|
|
* calling _serializeToken on each of these.
|
|
*/
|
|
WSP._serializeDOM = function( node, state ) {
|
|
// serialize this node
|
|
switch( node.nodeType ) {
|
|
case Node.ELEMENT_NODE:
|
|
//console.warn( node.nodeName.toLowerCase() );
|
|
var children = node.childNodes,
|
|
name = node.nodeName.toLowerCase(),
|
|
tkAttribs = this._getDOMAttribs(node.attributes),
|
|
tkRTInfo = this._getDOMRTInfo(node.attributes);
|
|
|
|
// Serialize the start token
|
|
this._serializeToken(state, new TagTk(name, tkAttribs, tkRTInfo));
|
|
|
|
// then children
|
|
for ( var i = 0, l = children.length; i < l; i++ ) {
|
|
this._serializeDOM( children[i], state );
|
|
}
|
|
|
|
// then the end token
|
|
this._serializeToken(state, new EndTagTk(name, tkAttribs, tkRTInfo));
|
|
break;
|
|
case Node.TEXT_NODE:
|
|
this._serializeToken( state, node.data );
|
|
break;
|
|
case Node.COMMENT_NODE:
|
|
// delay the newline creation until after the comment
|
|
var savedEmitNewlineOnNextToken = state.emitNewlineOnNextToken;
|
|
state.emitNewlineOnNextToken = false;
|
|
this._serializeToken( state, new CommentTk( node.data ) );
|
|
state.emitNewlineOnNextToken = savedEmitNewlineOnNextToken;
|
|
break;
|
|
default:
|
|
console.warn( "Unhandled node type: " +
|
|
node.outerHTML );
|
|
break;
|
|
}
|
|
};
|
|
|
|
WSP._getDOMAttribs = function( attribs ) {
|
|
// convert to list fo key-value pairs
|
|
var out = [];
|
|
for ( var i = 0, l = attribs.length; i < l; i++ ) {
|
|
var attrib = attribs.item(i);
|
|
if ( attrib.name !== 'data-mw' ) {
|
|
out.push( { k: attrib.name, v: attrib.value } );
|
|
}
|
|
}
|
|
return out;
|
|
};
|
|
|
|
WSP._getDOMRTInfo = function( attribs ) {
|
|
if ( attribs['data-mw'] ) {
|
|
return JSON.parse( attribs['data-mw'].value || '{}' );
|
|
} else {
|
|
return {};
|
|
}
|
|
};
|
|
|
|
|
|
// Quick HACK: define Node constants locally
|
|
// https://developer.mozilla.org/en/nodeType
|
|
var Node = {
|
|
ELEMENT_NODE: 1,
|
|
ATTRIBUTE_NODE: 2,
|
|
TEXT_NODE: 3,
|
|
CDATA_SECTION_NODE: 4,
|
|
ENTITY_REFERENCE_NODE: 5,
|
|
ENTITY_NODE: 6,
|
|
PROCESSING_INSTRUCTION_NODE: 7,
|
|
COMMENT_NODE: 8,
|
|
DOCUMENT_NODE: 9,
|
|
DOCUMENT_TYPE_NODE: 10,
|
|
DOCUMENT_FRAGMENT_NODE: 11,
|
|
NOTATION_NODE: 12
|
|
};
|
|
|
|
|
|
if (typeof module == "object") {
|
|
module.exports.WikitextSerializer = WikitextSerializer;
|
|
}
|