diff --git a/modules/parser/ext.core.ListHandler.js b/modules/parser/ext.core.ListHandler.js new file mode 100644 index 0000000000..b0a65b2a1c --- /dev/null +++ b/modules/parser/ext.core.ListHandler.js @@ -0,0 +1,157 @@ +/* + * Create list tag around list items and map wiki bullet levels to html + */ + +function ListHandler ( manager ) { + this.manager = manager + this.reset(); + this.manager.addTransform( this.onListItem.bind(this), + this.listRank, 'tag', 'listItem' ); + this.manager.addTransform( this.onEnd.bind(this), + this.listRank, 'end' ); +} + +ListHandler.prototype.listRank = 2.49; // before PostExpandParagraphHandler +ListHandler.prototype.delta = .001; + +ListHandler.prototype.bulletCharsMap = { + '*': { list: 'ul', item: 'li' }, + '#': { list: 'ol', item: 'li' }, + ';': { list: 'dl', item: 'dt' }, + ':': { list: 'dl', item: 'dd' }, +}; + +ListHandler.prototype.reset = function() { + this.newline = false; + this.bstack = []; // Bullet stack, previous element's listStyle + this.endtags = []; // Stack of end tags +}; + +ListHandler.prototype.onNewline = function ( token, frame, prevToken ) { + var tokens = []; + token.rank = this.listRank + this.delta; + if (!this.bstack.length) { + tokens.push(token); + } + else + { + if (this.newline) + tokens = tokens.concat(this.end()); + + this.newline = true; + } + return { tokens: tokens }; +}; + +ListHandler.prototype.onEnd = function( token, frame, prevToken ) { + return { tokens: this.end() }; +}; + +ListHandler.prototype.end = function( ) { + // pop all open list item tokens + var tokens = this.popTags(this.bstack.length); + this.reset(); + this.manager.removeTransform( this.listRank, 'newline' ); + return tokens; +}; + +ListHandler.prototype.onListItem = function ( token, frame, prevToken ) { + if (token.constructor === TagTk) + { + // convert listItem to list and list item tokens + return { tokens: this.doListItem( this.bstack, token.bullets ) }; + } + return { token: token }; +}; + +ListHandler.prototype.commonPrefixLength = function (x, y) { + var minLength = Math.min(x.length, y.length); + for(var i = 0; i < minLength; i++) { + if (x[i] != y[i]) + break; + } + return i; +}; + +ListHandler.prototype.pushList = function ( container ) { + this.endtags.push( new EndTagTk( container.list )); + this.endtags.push( new EndTagTk( container.item )); + return [ + new TagTk( container.list ), + new TagTk( container.item ) + ]; +}; + +ListHandler.prototype.popTags = function ( n ) { + var tokens = []; + for(;n > 0; n--) { + // push list item.. + tokens.push(this.endtags.pop()); + // and the list end tag + tokens.push(this.endtags.pop()); + } + return tokens; +}; + +ListHandler.prototype.isDlDd = function (a, b) { + var ab = [a,b].sort(); + return (ab[0] === ':' && ab[1] === ';'); +}; + +ListHandler.prototype.doListItem = function ( bs, bn ) { + var prefixLen = this.commonPrefixLength (bs, bn), + changeLen = Math.max(bs.length, bn.length) - prefixLen, + prefix = bn.slice(0, prefixLen); + this.newline = false; + this.bstack = bn; + if (!bs.length) + { + this.manager.addTransform( this.onNewline.bind(this), + this.listRank, 'newline' ); + } + // emit close tag tokens for closed lists + if (changeLen === 0) + { + var itemToken = this.endtags.pop(); + this.endtags.push(new EndTagTk( itemToken.name )); + return [ + itemToken, + new TagTk( itemToken.name ) + ]; + } + else if ( bs.length == bn.length + && changeLen == 1 + && this.isDlDd( bs[prefixLen], bn[prefixLen] ) ) + { + // handle dd/dt transitions + var newName = this.bulletCharsMap[bn[prefixLen]].item; + this.endtags.push(new EndTagTk( newName )); + return [ + this.endtags.pop(), + new TagTk( newName ) + ]; + } + else + { + var tokens = this.popTags(bs.length - prefixLen); + + if (prefixLen > 0 && bn.length == prefixLen ) { + var itemToken = this.endtags.pop(); + tokens.push(itemToken); + tokens.push(new TagTk( itemToken.name )); + this.endtags.push(new EndTagTk( itemToken.name )); + } + + for(var i = prefixLen; i < bn.length; i++) { + if (!this.bulletCharsMap[bn[i]]) + throw("Unknown node prefix " + prefix[i]); + + tokens = tokens.concat(this.pushList(this.bulletCharsMap[bn[i]])); + } + return tokens; + } +}; + +if (typeof module == "object") { + module.exports.ListHandler = ListHandler; +} diff --git a/modules/parser/mediawiki.parser.js b/modules/parser/mediawiki.parser.js index 3b9b5673c4..60afd5dbfb 100644 --- a/modules/parser/mediawiki.parser.js +++ b/modules/parser/mediawiki.parser.js @@ -34,6 +34,7 @@ var fs = require('fs'), Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer, TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler, AttributeExpander = require('./ext.core.AttributeExpander.js').AttributeExpander, + ListHandler = require('./ext.core.ListHandler.js').ListHandler, LinkHandler = require('./ext.core.LinkHandler.js'), WikiLinkHandler = LinkHandler.WikiLinkHandler, ExternalLinkHandler = LinkHandler.ExternalLinkHandler, @@ -132,7 +133,7 @@ ParserPipelineFactory.prototype.recipes = { [ // text/wiki-specific tokens QuoteTransformer, - /* ListHandler, */ + ListHandler, Cite, PostExpandParagraphHandler, Sanitizer diff --git a/modules/parser/pegTokenizer.pegjs.txt b/modules/parser/pegTokenizer.pegjs.txt index 7f57530c5e..3565af9dee 100644 --- a/modules/parser/pegTokenizer.pegjs.txt +++ b/modules/parser/pegTokenizer.pegjs.txt @@ -131,143 +131,6 @@ }); }; - /* - * Annotate a token stream with list items with appropriate list tokens - * - * XXX: Move this to a token handler in phase sync23! That way we can - * support list items from templates too. - * - * @static - * @method - * @param {[tokens]} Token stream with li tokens - * @returns {[tokens]} Token stream, possibly with additional list tokens - * */ - var annotateList = function ( tokens ) { - var out = [], // List of tokens - bstack = [], // Bullet stack, previous element's listStyle - bnext = [], // Next element's listStyle - endtags = []; // Stack of end tags - - var commonPrefixLength = function (x, y) { - var minLength = Math.min(x.length, y.length); - for(var i = 0; i < minLength; i++) { - if (x[i] != y[i]) - break; - } - return i; - }; - - var pushList = function ( listName, itemName ) { - out.push( new TagTk( listName )); - out.push( new TagTk( itemName )); - endtags.push( new EndTagTk( listName )); - endtags.push( new EndTagTk( itemName )); - }; - - var popTags = function ( n ) { - for(;n > 0; n--) { - // push list item.. - out.push(endtags.pop()); - // and the list end tag - out.push(endtags.pop()); - } - }; - - var isDlDd = function (a, b) { - var ab = [a,b].sort(); - return (ab[0] === ':' && ab[1] === ';'); - }; - - var doListItem = function ( bs, bn ) { - var prefixLen = commonPrefixLength (bs, bn); - var changeLen = Math.max(bs.length, bn.length) - prefixLen; - var prefix = bn.slice(0, prefixLen); - // emit close tag tokens for closed lists - if (changeLen === 0) { - var itemToken = endtags.pop(); - out.push(itemToken); - out.push(new TagTk( itemToken.name )); - endtags.push(new EndTagTk( itemToken.name )); - } else if ( bs.length == bn.length - && changeLen == 1 - && isDlDd( bs[prefixLen], bn[prefixLen] ) ) { - // handle dd/dt transitions - out.push(endtags.pop()); - if( bn[prefixLen] == ';') { - var newName = 'dt'; - } else { - var newName = 'dd'; - } - out.push(new TagTk( newName )); - endtags.push(new EndTagTk( newName )); - } else { - popTags(bs.length - prefixLen); - - if (prefixLen > 0 && bn.length == prefixLen ) { - var itemToken = endtags.pop(); - out.push(itemToken); - out.push(new TagTk( itemToken.name )); - endtags.push(new EndTagTk( itemToken.name )); - } - - for(var i = prefixLen; i < bn.length; i++) { - switch (bn[i]) { - case '*': - pushList('ul', 'li'); - break; - case '#': - pushList('ol', 'li'); - break; - case ';': - pushList('dl', 'dt'); - break; - case ':': - pushList('dl', 'dd'); - break; - default: - throw("Unknown node prefix " + prefix[i]); - } - } - } - }; - - for (var i = 0, length = tokens.length; i < length; i++) { - var token = tokens[i]; - switch ( token.constructor ) { - case TagTk: - switch (token.name) { - case 'list': - // ignore token - break; - case 'listItem': - // convert listItem to list and list item tokens - bnext = token.bullets; - doListItem( bstack, bnext ); - bstack = bnext; - break; - default: - // pass through all remaining start tags - out.push(token); - break; - } - break; - case EndTagTk: - if ( token.name == 'list' ) { - // pop all open list item tokens - popTags(bstack.length); - bstack = []; - } else { - out.push(token); - } - break; - default: - out.push(token); - break; - } - } - return out; - }; - /** * Determine if a string represents a valid ISBN-10 or ISBN-13 identifier @@ -1437,12 +1300,7 @@ block_tag /********************************************************* * Lists *********************************************************/ -lists = e:(dtdd / li) es:(sol (dtdd / li))* -{ - return annotateList( [ new TagTk( 'list' ) ] - .concat(flatten([e].concat(es)) - ,[ new EndTagTk( 'list' ) ])); -} +lists = (dtdd / li) (sol (dtdd / li))* li = bullets:list_char+ c:inlineline? @@ -1456,7 +1314,7 @@ li = bullets:list_char+ } dtdd - = bullets:(!(";" !list_char) list_char)* + = bullets:(!(";" !list_char) lc:list_char { return lc })* ";" & {return stops.inc('colon');} c:inlineline @@ -1475,7 +1333,8 @@ dtdd // c[clen - 1].value = val.substr(0, val.length - 1) + "\u00a0"; // } //} - + + bullets = bullets.join(''); var li = new TagTk( 'listItem' ); li.bullets = bullets + ";"; var li2 = new TagTk( 'listItem' ); diff --git a/tests/parser/parserTests-whitelist.js b/tests/parser/parserTests-whitelist.js index 23efecdf4e..a37b196160 100644 --- a/tests/parser/parserTests-whitelist.js +++ b/tests/parser/parserTests-whitelist.js @@ -18,7 +18,7 @@ testWhiteList["Link containing double-single-quotes '' in text embedded in itali testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "

Some pretty italics and stuff!

"; // This is a rare edge case, and the new behavior is arguably more consistent -testWhiteList["5 quotes, code coverage +1 line"] = "

'

"; +testWhiteList["5 quotes, code coverage +1 line"] = "

"; // The comment in the test already suggests this result as correct, but // supplies the old result without preformatting.