From 5f584909e131de1606d812a8256f9ab3c00fec64 Mon Sep 17 00:00:00 2001 From: Subramanya Sastry Date: Fri, 22 Jun 2012 17:51:50 -0500 Subject: [PATCH] Added documentation + minor code refactoring * Renamed defaultOptions to initialState * Got rid of unused state property * Added comments explaining how state attributes and tag handler flags are used * Refactored listItemHandler check into functions and added FIXME possible rewriting of that check. * Protected serializeDOM in a try-catch handler to catch exceptions and output the exception to the console. Change-Id: I3d351c06e4b86baeb5a55243b11dbfa9baca5bb7 --- .../parser/mediawiki.WikitextSerializer.js | 159 +++++++++++++----- 1 file changed, 121 insertions(+), 38 deletions(-) diff --git a/modules/parser/mediawiki.WikitextSerializer.js b/modules/parser/mediawiki.WikitextSerializer.js index 0b540e91ff..3adb440977 100644 --- a/modules/parser/mediawiki.WikitextSerializer.js +++ b/modules/parser/mediawiki.WikitextSerializer.js @@ -16,13 +16,42 @@ var PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer; var WSP = WikitextSerializer.prototype; -WSP.defaultOptions = { - onNewline: true, // actually following start of file or a real newline - onStartOfLine : true, // in start-of-line context, not necessarily after newline +/* ********************************************************************* + * Here is what the state attributes mean: + * + * listStack + * Stack of list contexts to let us emit wikitext for nested lists. + * Each context keeps track of 3 values: + * - itemBullet: the wikitext bullet char for this list + * - itemCount : # of list items encountered so far for the list + * - bullets : cumulative bullet prefix based on all the lists + * that enclose the current list + * + * onNewline + * true on start of file or after a new line has been emitted. + * + * onStartOfLine + * true when onNewline is true, and also in other start-of-line contexts + * Ex: after a comment has been emitted, or after include/noinclude tokens. + * + * singleLineMode + * - if (> 0), we cannot emit any newlines. + * - this value changes as we entire/exit dom subtrees that require + * single-line wikitext output. WSP._tagHandlers specify single-line + * mode for individual tags. + * + * availableNewlineCount + * # of newlines that have been encountered so far but not emitted yet. + * Newlines are buffered till they need to be output. This lets us + * swallow newlines in contexts where they shouldn't be emitted for + * ensuring equivalent wikitext output. (ex dom: ..\n\n..) + * ********************************************************************* */ +WSP.initialState = { listStack: [], - lastHandler: null, - availableNewlineCount: 0, // collected (and stripped) newlines from the input - singleLineMode: 0 // single-line syntactical context: list items, headings etc + onNewline: true, + onStartOfLine : true, + availableNewlineCount: 0, + singleLineMode: 0 }; WSP.escapeWikiText = function ( state, text ) { @@ -208,29 +237,52 @@ WSP._listEndHandler = function( state, token ) { }; WSP._listItemHandler = function ( handler, bullet, state, token ) { + function inStartOfLineContext(state) { + return state.onStartOfLine || + state.emitNewlineOnNextToken || + (state.availableNewlineCount > 0); + } + + function isRepeatToken(state, token) { + return state.prevToken.constructor === EndTagTk && + state.prevToken.name === token.name; + } + + function isMultiLineDtDdPair(state, token) { + return token.name === 'dd' && + token.dataAttribs.stx !== 'row' && + state.prevTagToken.constructor === EndTagTk && + state.prevTagToken.name === 'dt'; + } + var stack = state.listStack; var curList = stack[stack.length - 1]; curList.itemCount++; - var res; curList.itemBullet = bullet; - if (curList.itemCount > 1 && // don't prefix bullets on the first descent - // Check if the item is / will also be in start of line context, - // and prefix all bullets if so. - // XXX gwicke: abstract out the 'will be on start of line if - // output is not empty' bit to method or flag. - ( ( state.onStartOfLine || - state.availableNewlineCount || - state.emitNewlineOnNextToken || - // separation between the same tokens would be triggered - ( state.prevToken.constructor === EndTagTk && - state.prevToken.name === token.name) ) || - // insert a newline before the dd unless specified differently on - // the prevTagToken - ( token.name === 'dd' && token.dataAttribs.stx !== 'row' && - state.prevTagToken.constructor === EndTagTk && - state.prevTagToken.name === 'dt' - ) - ) + + // Output bullet prefix only if: + // - this is not the first list item + // - we are either in: + // * a new line context, + // * seeing an identical token as the last one (..
  • ...) + // (since we are in this handler on encountering a list item token, + // this means we are the 2nd or later item in the list, BUT without + // any intervening new lines or other tokens in between) + // * on the dd part of a multi-line dt-dd pair + // (The dd on a single-line dt-dd pair sticks to the dt. + // which means it won't get the bullets that the dt already got). + // + // SSS FIXME: This condition could be rephrased as: + // + // if (isRepeatToken(state, token) || + // (curList.itemCount > 1 && (inStartOfLineContext(state) || isMultiLineDtDdPair(state, token)))) + // + var res; + if (curList.itemCount > 1 && + ( inStartOfLineContext(state) || + isRepeatToken(state, token) || + isMultiLineDtDdPair(state, token) + ) ) { handler.startsNewline = true; @@ -373,6 +425,33 @@ WSP._linkEndHandler = function( state, token ) { } }; +/* ********************************************************************* + * startsNewline + * if true, the wikitext for the dom subtree rooted + * at this html tag requires a new line context. + * + * endsLine + * if true, the wikitext for the dom subtree rooted + * at this html tag ends the line. + * + * pairsSepNlCount + * # of new lines required between wikitext for dom siblings + * of the same tag type (..

    .., etc.) + * + * newlineTransparent + * if true, this token does not change the newline status + * after it is emitted. + * + * singleLine + * if 1, the wikitext for the dom subtree rooted at this html tag + * requires all content to be emitted on the same line without + * any line breaks. +1 sets the single-line mode (on descending + * the dom subtree), -1 clears the single-line mod (on exiting + * the dom subtree). + * + * ignore + * if true, the serializer pretends as if it never saw this token. + * ********************************************************************* */ WSP.tagHandlers = { body: { start: { @@ -662,7 +741,7 @@ WSP._serializeAttributes = function ( attribs ) { * Serialize a chunk of tokens */ WSP.serializeTokens = function( tokens, chunkCB ) { - var state = $.extend({}, this.defaultOptions, this.options), + var state = $.extend({}, this.initialState, this.options), i, l; if ( chunkCB === undefined ) { var out = []; @@ -879,18 +958,22 @@ WSP._serializeToken = function ( state, token ) { * Serialize an HTML DOM document. */ WSP.serializeDOM = function( node, chunkCB ) { - var state = $.extend({}, this.defaultOptions, this.options); - //console.warn( node.innerHTML ); - if ( ! chunkCB ) { - var out = []; - state.chunkCB = out.push.bind( out ); - this._serializeDOM( node, state ); - this._serializeToken( state, new EOFTk() ); - return out.join(''); - } else { - state.chunkCB = chunkCB; - this._serializeDOM( node, state ); - this._serializeToken( state, new EOFTk() ); + try { + var state = $.extend({}, this.initialState, this.options); + //console.warn( node.innerHTML ); + if ( ! chunkCB ) { + var out = []; + state.chunkCB = out.push.bind( out ); + this._serializeDOM( node, state ); + this._serializeToken( state, new EOFTk() ); + return out.join(''); + } else { + state.chunkCB = chunkCB; + this._serializeDOM( node, state ); + this._serializeToken( state, new EOFTk() ); + } + } catch (e) { + console.warn(e.stack); } };