Added documentation + minor code refactoring

* Renamed defaultOptions to initialState * Got rid of unused state property * Added comments explaining how state attributes and tag handler flags are used * Refactored listItemHandler check into functions and added FIXME possible rewriting of that check. * Protected serializeDOM in a try-catch handler to catch exceptions and output the exception to the console. Change-Id: I3d351c06e4b86baeb5a55243b11dbfa9baca5bb7
2024-11-15 10:35:48 +00:00 · 2012-06-22 17:51:50 -05:00 · 2012-06-22 17:51:50 -05:00 · 5f584909e1
parent 7a3d8fabdb
commit 5f584909e1
1 changed files with 121 additions and 38 deletions
--- a/modules/parser/mediawiki.WikitextSerializer.js
+++ b/modules/parser/mediawiki.WikitextSerializer.js
@ -16,13 +16,42 @@ var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer;
 var WSP = WikitextSerializer.prototype;
-WSP.defaultOptions = {
+/* *********************************************************************
-	onNewline: true, // actually following start of file or a real newline
+ * Here is what the state attributes mean:
-	onStartOfLine : true, // in start-of-line context, not necessarily after newline
+ *
 * listStack
 *    Stack of list contexts to let us emit wikitext for nested lists.
 *    Each context keeps track of 3 values:
 *    - itemBullet: the wikitext bullet char for this list
 *    - itemCount : # of list items encountered so far for the list 
 *    - bullets   : cumulative bullet prefix based on all the lists
 *                  that enclose the current list
 *
 * onNewline
 *    true on start of file or after a new line has been emitted.
 *
 * onStartOfLine
 *    true when onNewline is true, and also in other start-of-line contexts
 *    Ex: after a comment has been emitted, or after include/noinclude tokens.
 *
 * singleLineMode
 *    - if (> 0), we cannot emit any newlines.
 *    - this value changes as we entire/exit dom subtrees that require
 *      single-line wikitext output. WSP._tagHandlers specify single-line
 *      mode for individual tags.
 *
 * availableNewlineCount
 *    # of newlines that have been encountered so far but not emitted yet.
 *    Newlines are buffered till they need to be output.  This lets us
 *    swallow newlines in contexts where they shouldn't be emitted for
 *    ensuring equivalent wikitext output. (ex dom: ..</li>\n\n</li>..)
 * ********************************************************************* */
 WSP.initialState = {
 	listStack: [],
-	lastHandler: null,
+	onNewline: true,
-	availableNewlineCount: 0, // collected (and stripped) newlines from the input
+	onStartOfLine : true,
-	singleLineMode: 0 // single-line syntactical context: list items, headings etc
+	availableNewlineCount: 0,
 	singleLineMode: 0
 };
 WSP.escapeWikiText = function ( state, text ) {
@ -208,29 +237,52 @@ WSP._listEndHandler = function( state, token ) {
 };
 WSP._listItemHandler = function ( handler, bullet, state, token ) { 
 	function inStartOfLineContext(state) {
 		return	state.onStartOfLine || 
 				state.emitNewlineOnNextToken ||
 				(state.availableNewlineCount > 0);
 	}
 	function isRepeatToken(state, token) {
 		return	state.prevToken.constructor === EndTagTk && 
 				state.prevToken.name === token.name;
 	}
 	function isMultiLineDtDdPair(state, token) {
 		return	token.name === 'dd' && 
 				token.dataAttribs.stx !== 'row' &&
 				state.prevTagToken.constructor === EndTagTk &&
 				state.prevTagToken.name === 'dt';
 	}
 	var stack   = state.listStack;
 	var curList = stack[stack.length - 1];
 	curList.itemCount++;
 	var res;
 	curList.itemBullet = bullet;
-	if (curList.itemCount > 1 && // don't prefix bullets on the first descent
+
-			// Check if the item is / will also be in start of line context,
+	// Output bullet prefix only if:
-			// and prefix all bullets if so.
+	// - this is not the first list item
-			// XXX gwicke: abstract out the 'will be on start of line if
+	// - we are either in:
-			// output is not empty' bit to method or flag.
+	//    * a new line context, 
-		(	( state.onStartOfLine ||
+	//    * seeing an identical token as the last one (..</li><li>...)
-				state.availableNewlineCount ||
+	//      (since we are in this handler on encountering a list item token,
-				state.emitNewlineOnNextToken || 
+	//       this means we are the 2nd or later item in the list, BUT without
-				// separation between the same tokens would be triggered
+	//       any intervening new lines or other tokens in between)
-				( state.prevToken.constructor === EndTagTk && 
+	//    * on the dd part of a multi-line dt-dd pair
-					state.prevToken.name === token.name) ) || 
+	//      (The dd on a single-line dt-dd pair sticks to the dt.
-			// insert a newline before the dd unless specified differently on
+	//       which means it won't get the bullets that the dt already got).
-			// the prevTagToken
+	//
-			(   token.name === 'dd' && token.dataAttribs.stx !== 'row' &&
+	// SSS FIXME: This condition could be rephrased as:
-				state.prevTagToken.constructor === EndTagTk &&
+	//
-				state.prevTagToken.name === 'dt'
+	// if (isRepeatToken(state, token) ||
-			)
+	//     (curList.itemCount > 1 && (inStartOfLineContext(state) || isMultiLineDtDdPair(state, token))))
-		) 
+	//
 	var res;
 	if (curList.itemCount > 1 && 
 		(	inStartOfLineContext(state) ||
 			isRepeatToken(state, token) ||
 			isMultiLineDtDdPair(state, token)
 		)
 	)
 	{
 		handler.startsNewline = true;
@ -373,6 +425,33 @@ WSP._linkEndHandler = function( state, token ) {
 	}
 };
 /* *********************************************************************
 * startsNewline
 *     if true, the wikitext for the dom subtree rooted
 *     at this html tag requires a new line context.
 *
 * endsLine
 *     if true, the wikitext for the dom subtree rooted
 *     at this html tag ends the line.
 *
 * pairsSepNlCount
 *     # of new lines required between wikitext for dom siblings
 *     of the same tag type (..</p><p>.., etc.)
 *
 * newlineTransparent
 *     if true, this token does not change the newline status
 *     after it is emitted.
 *
 * singleLine
 *     if 1, the wikitext for the dom subtree rooted at this html tag
 *     requires all content to be emitted on the same line without 
 *     any line breaks. +1 sets the single-line mode (on descending
 *     the dom subtree), -1 clears the single-line mod (on exiting
 *     the dom subtree).
 *
 * ignore
 *     if true, the serializer pretends as if it never saw this token.
 * ********************************************************************* */
 WSP.tagHandlers = {
 	body: {
 		start: {
@ -662,7 +741,7 @@ WSP._serializeAttributes = function ( attribs ) {
 * Serialize a chunk of tokens
 */
 WSP.serializeTokens = function( tokens, chunkCB ) {
-	var state = $.extend({}, this.defaultOptions, this.options),
+	var state = $.extend({}, this.initialState, this.options),
 		i, l;
 	if ( chunkCB === undefined ) {
 		var out = [];
@ -879,18 +958,22 @@ WSP._serializeToken = function ( state, token ) {
 * Serialize an HTML DOM document.
 */
 WSP.serializeDOM = function( node, chunkCB ) {
-	var state = $.extend({}, this.defaultOptions, this.options);
+	try {
-	//console.warn( node.innerHTML );
+		var state = $.extend({}, this.initialState, this.options);
-	if ( ! chunkCB ) {
+		//console.warn( node.innerHTML );
-		var out = [];
+		if ( ! chunkCB ) {
-		state.chunkCB = out.push.bind( out );
+			var out = [];
-		this._serializeDOM( node, state );
+			state.chunkCB = out.push.bind( out );
-		this._serializeToken( state, new EOFTk() );
+			this._serializeDOM( node, state );
-		return out.join('');
+			this._serializeToken( state, new EOFTk() );
-	} else {
+			return out.join('');
-		state.chunkCB = chunkCB;
+		} else {
-		this._serializeDOM( node, state );
+			state.chunkCB = chunkCB;
-		this._serializeToken( state, new EOFTk() );
+			this._serializeDOM( node, state );
 			this._serializeToken( state, new EOFTk() );
 		}
 	} catch (e) {
 		console.warn(e.stack);
 	}
 };