Added documentation + minor code refactoring

* Renamed defaultOptions to initialState
* Got rid of unused state property
* Added comments explaining how state attributes
  and tag handler flags are used
* Refactored listItemHandler check into functions and
  added FIXME possible rewriting of that check.
* Protected serializeDOM in a try-catch handler to
  catch exceptions and output the exception to the console.

Change-Id: I3d351c06e4b86baeb5a55243b11dbfa9baca5bb7
This commit is contained in:
Subramanya Sastry 2012-06-22 17:51:50 -05:00
parent 7a3d8fabdb
commit 5f584909e1

View file

@ -16,13 +16,42 @@ var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer;
var WSP = WikitextSerializer.prototype; var WSP = WikitextSerializer.prototype;
WSP.defaultOptions = { /* *********************************************************************
onNewline: true, // actually following start of file or a real newline * Here is what the state attributes mean:
onStartOfLine : true, // in start-of-line context, not necessarily after newline *
* listStack
* Stack of list contexts to let us emit wikitext for nested lists.
* Each context keeps track of 3 values:
* - itemBullet: the wikitext bullet char for this list
* - itemCount : # of list items encountered so far for the list
* - bullets : cumulative bullet prefix based on all the lists
* that enclose the current list
*
* onNewline
* true on start of file or after a new line has been emitted.
*
* onStartOfLine
* true when onNewline is true, and also in other start-of-line contexts
* Ex: after a comment has been emitted, or after include/noinclude tokens.
*
* singleLineMode
* - if (> 0), we cannot emit any newlines.
* - this value changes as we entire/exit dom subtrees that require
* single-line wikitext output. WSP._tagHandlers specify single-line
* mode for individual tags.
*
* availableNewlineCount
* # of newlines that have been encountered so far but not emitted yet.
* Newlines are buffered till they need to be output. This lets us
* swallow newlines in contexts where they shouldn't be emitted for
* ensuring equivalent wikitext output. (ex dom: ..</li>\n\n</li>..)
* ********************************************************************* */
WSP.initialState = {
listStack: [], listStack: [],
lastHandler: null, onNewline: true,
availableNewlineCount: 0, // collected (and stripped) newlines from the input onStartOfLine : true,
singleLineMode: 0 // single-line syntactical context: list items, headings etc availableNewlineCount: 0,
singleLineMode: 0
}; };
WSP.escapeWikiText = function ( state, text ) { WSP.escapeWikiText = function ( state, text ) {
@ -208,29 +237,52 @@ WSP._listEndHandler = function( state, token ) {
}; };
WSP._listItemHandler = function ( handler, bullet, state, token ) { WSP._listItemHandler = function ( handler, bullet, state, token ) {
function inStartOfLineContext(state) {
return state.onStartOfLine ||
state.emitNewlineOnNextToken ||
(state.availableNewlineCount > 0);
}
function isRepeatToken(state, token) {
return state.prevToken.constructor === EndTagTk &&
state.prevToken.name === token.name;
}
function isMultiLineDtDdPair(state, token) {
return token.name === 'dd' &&
token.dataAttribs.stx !== 'row' &&
state.prevTagToken.constructor === EndTagTk &&
state.prevTagToken.name === 'dt';
}
var stack = state.listStack; var stack = state.listStack;
var curList = stack[stack.length - 1]; var curList = stack[stack.length - 1];
curList.itemCount++; curList.itemCount++;
var res;
curList.itemBullet = bullet; curList.itemBullet = bullet;
if (curList.itemCount > 1 && // don't prefix bullets on the first descent
// Check if the item is / will also be in start of line context, // Output bullet prefix only if:
// and prefix all bullets if so. // - this is not the first list item
// XXX gwicke: abstract out the 'will be on start of line if // - we are either in:
// output is not empty' bit to method or flag. // * a new line context,
( ( state.onStartOfLine || // * seeing an identical token as the last one (..</li><li>...)
state.availableNewlineCount || // (since we are in this handler on encountering a list item token,
state.emitNewlineOnNextToken || // this means we are the 2nd or later item in the list, BUT without
// separation between the same tokens would be triggered // any intervening new lines or other tokens in between)
( state.prevToken.constructor === EndTagTk && // * on the dd part of a multi-line dt-dd pair
state.prevToken.name === token.name) ) || // (The dd on a single-line dt-dd pair sticks to the dt.
// insert a newline before the dd unless specified differently on // which means it won't get the bullets that the dt already got).
// the prevTagToken //
( token.name === 'dd' && token.dataAttribs.stx !== 'row' && // SSS FIXME: This condition could be rephrased as:
state.prevTagToken.constructor === EndTagTk && //
state.prevTagToken.name === 'dt' // if (isRepeatToken(state, token) ||
) // (curList.itemCount > 1 && (inStartOfLineContext(state) || isMultiLineDtDdPair(state, token))))
) //
var res;
if (curList.itemCount > 1 &&
( inStartOfLineContext(state) ||
isRepeatToken(state, token) ||
isMultiLineDtDdPair(state, token)
)
) )
{ {
handler.startsNewline = true; handler.startsNewline = true;
@ -373,6 +425,33 @@ WSP._linkEndHandler = function( state, token ) {
} }
}; };
/* *********************************************************************
* startsNewline
* if true, the wikitext for the dom subtree rooted
* at this html tag requires a new line context.
*
* endsLine
* if true, the wikitext for the dom subtree rooted
* at this html tag ends the line.
*
* pairsSepNlCount
* # of new lines required between wikitext for dom siblings
* of the same tag type (..</p><p>.., etc.)
*
* newlineTransparent
* if true, this token does not change the newline status
* after it is emitted.
*
* singleLine
* if 1, the wikitext for the dom subtree rooted at this html tag
* requires all content to be emitted on the same line without
* any line breaks. +1 sets the single-line mode (on descending
* the dom subtree), -1 clears the single-line mod (on exiting
* the dom subtree).
*
* ignore
* if true, the serializer pretends as if it never saw this token.
* ********************************************************************* */
WSP.tagHandlers = { WSP.tagHandlers = {
body: { body: {
start: { start: {
@ -662,7 +741,7 @@ WSP._serializeAttributes = function ( attribs ) {
* Serialize a chunk of tokens * Serialize a chunk of tokens
*/ */
WSP.serializeTokens = function( tokens, chunkCB ) { WSP.serializeTokens = function( tokens, chunkCB ) {
var state = $.extend({}, this.defaultOptions, this.options), var state = $.extend({}, this.initialState, this.options),
i, l; i, l;
if ( chunkCB === undefined ) { if ( chunkCB === undefined ) {
var out = []; var out = [];
@ -879,18 +958,22 @@ WSP._serializeToken = function ( state, token ) {
* Serialize an HTML DOM document. * Serialize an HTML DOM document.
*/ */
WSP.serializeDOM = function( node, chunkCB ) { WSP.serializeDOM = function( node, chunkCB ) {
var state = $.extend({}, this.defaultOptions, this.options); try {
//console.warn( node.innerHTML ); var state = $.extend({}, this.initialState, this.options);
if ( ! chunkCB ) { //console.warn( node.innerHTML );
var out = []; if ( ! chunkCB ) {
state.chunkCB = out.push.bind( out ); var out = [];
this._serializeDOM( node, state ); state.chunkCB = out.push.bind( out );
this._serializeToken( state, new EOFTk() ); this._serializeDOM( node, state );
return out.join(''); this._serializeToken( state, new EOFTk() );
} else { return out.join('');
state.chunkCB = chunkCB; } else {
this._serializeDOM( node, state ); state.chunkCB = chunkCB;
this._serializeToken( state, new EOFTk() ); this._serializeDOM( node, state );
this._serializeToken( state, new EOFTk() );
}
} catch (e) {
console.warn(e.stack);
} }
}; };