From 5f584909e131de1606d812a8256f9ab3c00fec64 Mon Sep 17 00:00:00 2001
From: Subramanya Sastry <ssastry@wikimedia.org>
Date: Fri, 22 Jun 2012 17:51:50 -0500
Subject: [PATCH] Added documentation + minor code refactoring

* Renamed defaultOptions to initialState
* Got rid of unused state property
* Added comments explaining how state attributes
  and tag handler flags are used
* Refactored listItemHandler check into functions and
  added FIXME possible rewriting of that check.
* Protected serializeDOM in a try-catch handler to
  catch exceptions and output the exception to the console.

Change-Id: I3d351c06e4b86baeb5a55243b11dbfa9baca5bb7
---
 .../parser/mediawiki.WikitextSerializer.js    | 159 +++++++++++++-----
 1 file changed, 121 insertions(+), 38 deletions(-)
diff --git a/modules/parser/mediawiki.WikitextSerializer.js b/modules/parser/mediawiki.WikitextSerializer.js
index 0b540e91ff..3adb440977 100644
--- a/modules/parser/mediawiki.WikitextSerializer.js
+++ b/modules/parser/mediawiki.WikitextSerializer.js
@@ -16,13 +16,42 @@ var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer;
 
 var WSP = WikitextSerializer.prototype;
 
-WSP.defaultOptions = {
-	onNewline: true, // actually following start of file or a real newline
-	onStartOfLine : true, // in start-of-line context, not necessarily after newline
+/* *********************************************************************
+ * Here is what the state attributes mean:
+ *
+ * listStack
+ *    Stack of list contexts to let us emit wikitext for nested lists.
+ *    Each context keeps track of 3 values:
+ *    - itemBullet: the wikitext bullet char for this list
+ *    - itemCount : # of list items encountered so far for the list 
+ *    - bullets   : cumulative bullet prefix based on all the lists
+ *                  that enclose the current list
+ *
+ * onNewline
+ *    true on start of file or after a new line has been emitted.
+ *
+ * onStartOfLine
+ *    true when onNewline is true, and also in other start-of-line contexts
+ *    Ex: after a comment has been emitted, or after include/noinclude tokens.
+ *
+ * singleLineMode
+ *    - if (> 0), we cannot emit any newlines.
+ *    - this value changes as we entire/exit dom subtrees that require
+ *      single-line wikitext output. WSP._tagHandlers specify single-line
+ *      mode for individual tags.
+ *
+ * availableNewlineCount
+ *    # of newlines that have been encountered so far but not emitted yet.
+ *    Newlines are buffered till they need to be output.  This lets us
+ *    swallow newlines in contexts where they shouldn't be emitted for
+ *    ensuring equivalent wikitext output. (ex dom: ..</li>\n\n</li>..)
+ * ********************************************************************* */
+WSP.initialState = {
 	listStack: [],
-	lastHandler: null,
-	availableNewlineCount: 0, // collected (and stripped) newlines from the input
-	singleLineMode: 0 // single-line syntactical context: list items, headings etc
+	onNewline: true,
+	onStartOfLine : true,
+	availableNewlineCount: 0,
+	singleLineMode: 0
 };
 
 WSP.escapeWikiText = function ( state, text ) {
@@ -208,29 +237,52 @@ WSP._listEndHandler = function( state, token ) {
 };
 
 WSP._listItemHandler = function ( handler, bullet, state, token ) { 
+	function inStartOfLineContext(state) {
+		return	state.onStartOfLine || 
+				state.emitNewlineOnNextToken ||
+				(state.availableNewlineCount > 0);
+	}
+
+	function isRepeatToken(state, token) {
+		return	state.prevToken.constructor === EndTagTk && 
+				state.prevToken.name === token.name;
+	}
+
+	function isMultiLineDtDdPair(state, token) {
+		return	token.name === 'dd' && 
+				token.dataAttribs.stx !== 'row' &&
+				state.prevTagToken.constructor === EndTagTk &&
+				state.prevTagToken.name === 'dt';
+	}
+
 	var stack   = state.listStack;
 	var curList = stack[stack.length - 1];
 	curList.itemCount++;
-	var res;
 	curList.itemBullet = bullet;
-	if (curList.itemCount > 1 && // don't prefix bullets on the first descent
-			// Check if the item is / will also be in start of line context,
-			// and prefix all bullets if so.
-			// XXX gwicke: abstract out the 'will be on start of line if
-			// output is not empty' bit to method or flag.
-		(	( state.onStartOfLine ||
-				state.availableNewlineCount ||
-				state.emitNewlineOnNextToken || 
-				// separation between the same tokens would be triggered
-				( state.prevToken.constructor === EndTagTk && 
-					state.prevToken.name === token.name) ) || 
-			// insert a newline before the dd unless specified differently on
-			// the prevTagToken
-			(   token.name === 'dd' && token.dataAttribs.stx !== 'row' &&
-				state.prevTagToken.constructor === EndTagTk &&
-				state.prevTagToken.name === 'dt'
-			)
-		) 
+
+	// Output bullet prefix only if:
+	// - this is not the first list item
+	// - we are either in:
+	//    * a new line context, 
+	//    * seeing an identical token as the last one (..</li><li>...)
+	//      (since we are in this handler on encountering a list item token,
+	//       this means we are the 2nd or later item in the list, BUT without
+	//       any intervening new lines or other tokens in between)
+	//    * on the dd part of a multi-line dt-dd pair
+	//      (The dd on a single-line dt-dd pair sticks to the dt.
+	//       which means it won't get the bullets that the dt already got).
+	//
+	// SSS FIXME: This condition could be rephrased as:
+	//
+	// if (isRepeatToken(state, token) ||
+	//     (curList.itemCount > 1 && (inStartOfLineContext(state) || isMultiLineDtDdPair(state, token))))
+	//
+	var res;
+	if (curList.itemCount > 1 && 
+		(	inStartOfLineContext(state) ||
+			isRepeatToken(state, token) ||
+			isMultiLineDtDdPair(state, token)
+		)
 	)
 	{
 		handler.startsNewline = true;
@@ -373,6 +425,33 @@ WSP._linkEndHandler = function( state, token ) {
 	}
 };
 
+/* *********************************************************************
+ * startsNewline
+ *     if true, the wikitext for the dom subtree rooted
+ *     at this html tag requires a new line context.
+ *
+ * endsLine
+ *     if true, the wikitext for the dom subtree rooted
+ *     at this html tag ends the line.
+ *
+ * pairsSepNlCount
+ *     # of new lines required between wikitext for dom siblings
+ *     of the same tag type (..</p><p>.., etc.)
+ *
+ * newlineTransparent
+ *     if true, this token does not change the newline status
+ *     after it is emitted.
+ *
+ * singleLine
+ *     if 1, the wikitext for the dom subtree rooted at this html tag
+ *     requires all content to be emitted on the same line without 
+ *     any line breaks. +1 sets the single-line mode (on descending
+ *     the dom subtree), -1 clears the single-line mod (on exiting
+ *     the dom subtree).
+ *
+ * ignore
+ *     if true, the serializer pretends as if it never saw this token.
+ * ********************************************************************* */
 WSP.tagHandlers = {
 	body: {
 		start: {
@@ -662,7 +741,7 @@ WSP._serializeAttributes = function ( attribs ) {
  * Serialize a chunk of tokens
  */
 WSP.serializeTokens = function( tokens, chunkCB ) {
-	var state = $.extend({}, this.defaultOptions, this.options),
+	var state = $.extend({}, this.initialState, this.options),
 		i, l;
 	if ( chunkCB === undefined ) {
 		var out = [];
@@ -879,18 +958,22 @@ WSP._serializeToken = function ( state, token ) {
  * Serialize an HTML DOM document.
  */
 WSP.serializeDOM = function( node, chunkCB ) {
-	var state = $.extend({}, this.defaultOptions, this.options);
-	//console.warn( node.innerHTML );
-	if ( ! chunkCB ) {
-		var out = [];
-		state.chunkCB = out.push.bind( out );
-		this._serializeDOM( node, state );
-		this._serializeToken( state, new EOFTk() );
-		return out.join('');
-	} else {
-		state.chunkCB = chunkCB;
-		this._serializeDOM( node, state );
-		this._serializeToken( state, new EOFTk() );
+	try {
+		var state = $.extend({}, this.initialState, this.options);
+		//console.warn( node.innerHTML );
+		if ( ! chunkCB ) {
+			var out = [];
+			state.chunkCB = out.push.bind( out );
+			this._serializeDOM( node, state );
+			this._serializeToken( state, new EOFTk() );
+			return out.join('');
+		} else {
+			state.chunkCB = chunkCB;
+			this._serializeDOM( node, state );
+			this._serializeToken( state, new EOFTk() );
+		}
+	} catch (e) {
+		console.warn(e.stack);
 	}
 };