diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..98b092a984 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.svn +*~ +*.kate-swp +.*.swp diff --git a/VisualEditor.i18n.php b/VisualEditor.i18n.php index 62fde59414..c560d2bd12 100644 --- a/VisualEditor.i18n.php +++ b/VisualEditor.i18n.php @@ -289,10 +289,34 @@ $messages['ia'] = array( ); /** Italian (Italiano) + * @author Beta16 * @author F. Cosoleto */ $messages['it'] = array( - 'visualeditor-feedback-prompt' => 'Lascia feedback', + 'visualeditor' => 'VisualEditor', + 'visualeditorsandbox' => 'Prova editor visivo', + 'visualeditor-desc' => 'Editor visivo per MediaWiki', + 'visualeditor-sandbox-title' => "Pagina delle prove per l'editor visivo", + 'visualeditor-tooltip-wikitext' => 'Mostra/nascondi wikitesto', + 'visualeditor-tooltip-json' => 'Mostra/nascondi JSON', + 'visualeditor-tooltip-html' => 'Mostra/nascondi HTML', + 'visualeditor-tooltip-render' => 'Mostra/nascondi anteprima', + 'visualeditor-tooltip-history' => 'Mostra/nascondi azioni', + 'visualeditor-tooltip-help' => 'Mostra/nascondi aiuto', + 'visualeditor-feedback-prompt' => 'Lascia un commento', + 'visualeditor-feedback-dialog-title' => "Lascia un commento sulla pagina delle prove per l'editor visivo", +); + +/** Japanese (日本語) + * @author Shirayuki + */ +$messages['ja'] = array( + 'visualeditor-tooltip-wikitext' => 'ウィキテキストの表示を切り替え', + 'visualeditor-tooltip-json' => 'JSON 表示を切り替え', + 'visualeditor-tooltip-html' => 'HTML 表示を切り替え', + 'visualeditor-tooltip-render' => 'プレビューを切り替え', + 'visualeditor-tooltip-help' => 'ヘルプ表示を切り替え', + 'visualeditor-feedback-prompt' => 'フィードバックを残す', ); /** Luxembourgish (Lëtzebuergesch) diff --git a/modules/parser/ext.Cite.js b/modules/parser/ext.Cite.js index 7b452704f8..dc9128d487 100644 --- a/modules/parser/ext.Cite.js +++ b/modules/parser/ext.Cite.js @@ -1,153 +1,50 @@ +var TokenCollector = require( './ext.util.TokenCollector.js' ).TokenCollector; + /** * Simple token transform version of the Cite extension. * * @class * @constructor */ -function Cite ( dispatcher ) { +function Cite ( manager, isInclude ) { + this.manager = manager; this.refGroups = {}; - this.refTokens = []; - // Within ref block - this.isActive = false; - this.register( dispatcher ); + // Set up the collector for ref sections + new TokenCollector( + manager, + this.handleRef.bind(this), + true, // match the end-of-input if is missing + this.rank, + 'tag', + 'ref' + ); + // And register for references tags + manager.addTransform( this.onReferences.bind(this), + this.referencesRank, 'tag', 'references' ); } -/** - * Register with dispatcher. - * - * @method - * @param {Object} TokenTransformDispatcher to register to - */ -Cite.prototype.register = function ( dispatcher ) { - // Register for ref and references tag tokens - var self = this; - this.onRefCB = function (ctx) { - return self.onRef(ctx); - }; - dispatcher.appendListener( this.onRefCB, 'tag', 'ref' ); - dispatcher.appendListener( function (ctx) { - return self.onReferences(ctx); - }, 'tag', 'references' ); - dispatcher.appendListener( function (ctx) { - return self.onEnd(ctx); - }, 'end' ); -}; - +Cite.prototype.rank = 2.15; // after QuoteTransformer, but before PostExpandParagraphHandler +Cite.prototype.referencesRank = 2.6; // after PostExpandParagraphHandler +//Cite.prototype.rank = 2.6; /** - * Convert list of key-value pairs to object, with first entry for a - * key winning. - * - * XXX: Move to general utils - * - * @static - * @method - * @param {Array} List of [key, value] pairs - * @returns {Object} Object with key/values set, first entry wins. + * Handle ref section tokens collected by the TokenCollector. */ -Cite.prototype.attribsToObject = function ( attribs ) { - if ( attribs === undefined ) { - return {}; - } - var obj = {}; - for ( var i = 0, l = attribs.length; i < l; i++ ) { - var kv = attribs[i]; - if (! kv[0] in obj) { - obj[kv[0]] = kv[1]; - } - } - return obj; -}; - -/** - * Handle ref tag tokens. - * - * @method - * @param {Object} TokenContext - * @returns {Object} TokenContext - */ -Cite.prototype.onRef = function ( tokenCTX ) { - - var refGroups = this.refGroups; - - var getRefGroup = function(group) { - if (!(group in refGroups)) { - var refs = [], - byName = {}; - refGroups[group] = { - refs: refs, - byName: byName, - add: function(tokens, options) { - var ref; - if (options.name && options.name in byName) { - ref = byName[options.name]; - } else { - var n = refs.length; - var key = n + ''; - if (options.name) { - key = options.name + '-' + key; - } - ref = { - tokens: tokens, - index: n, - groupIndex: n, // @fixme - name: options.name, - group: options.group, - key: key, - target: 'cite_note-' + key, - linkbacks: [] - }; - refs[n] = ref; - if (options.name) { - byName[options.name] = ref; - } - } - ref.linkbacks.push( - 'cite_ref-' + ref.key + '-' + ref.linkbacks.length - ); - return ref; - } - }; - } - return refGroups[group]; - }; - - var token = tokenCTX.token; - // Collect all tokens between ref start and endtag - if ( ! this.isActive && - token.type === 'TAG' && - token.name.toLowerCase() === 'ref' ) { - this.curRef = tokenCTX.token; - // Prepend self for 'any' token type - tokenCTX.dispatcher.prependListener(this.onRefCB, 'any' ); - tokenCTX.token = null; - this.isActive = true; - return tokenCTX; - } else if ( this.isActive && - // Also accept really broken ref close tags.. - [TagTk, EndTagTk, SelfclosingTagTk].indexOf(token.constructor) >= 0 && - token.name.toLowerCase() === 'ref' - ) - { - this.isActive = false; - tokenCTX.dispatcher.removeListener(this.onRefCB, 'any' ); - // fall through for further processing! - } else { - // Inside ref block: Collect all other tokens in refTokens and abort - //console.warn(JSON.stringify(tokenCTX.token, null, 2)); - this.refTokens.push(tokenCTX.token); - tokenCTX.token = null; - return tokenCTX; +Cite.prototype.handleRef = function ( tokens ) { + // remove the first ref tag + var startTag = tokens.shift(); + if ( tokens[tokens.length - 1].name === 'ref' ) { + tokens.pop(); } var options = $.extend({ name: null, group: null - }, this.attribsToObject(this.curRef.attribs)); + }, this.manager.env.KVtoHash(startTag.attribs)); - var group = getRefGroup(options.group); - var ref = group.add(this.refTokens, options); - this.refTokens = []; + var group = this.getRefGroup(options.group); + var ref = group.add(tokens, options); + //console.warn( 'added tokens: ' + JSON.stringify( this.refGroups, null, 2 )); var linkback = ref.linkbacks[ref.linkbacks.length - 1]; @@ -158,37 +55,26 @@ Cite.prototype.onRef = function ( tokenCTX ) { //bits.push(env.formatNum( ref.groupIndex + 1 )); bits.push(ref.groupIndex + 1); - tokenCTX.token = [ - { - type: 'TAG', - name: 'span', - attribs: [ - ['id', linkback], - ['class', 'reference'], - // ignore element when serializing back to wikitext - ['data-nosource', ''] - ] - }, - { - type: 'TAG', - name: 'a', - attribs: [ - ['data-type', 'hashlink'], - ['href', '#' + ref.target] - // XXX: Add round-trip info here? - ] - }, - '[' + bits.join(' ') + ']', - { - type: 'ENDTAG', - name: 'a' - }, - { - type: 'ENDTAG', - name: 'span' - } + var res = [ + new TagTk('span', [ + new KV('id', linkback), + new KV('class', 'reference'), + // ignore element when serializing back to wikitext + new KV('data-nosource', '') + ] + ), + new TagTk( 'a', [ + new KV('data-type', 'hashlink'), + new KV('href', '#' + ref.target) + // XXX: Add round-trip info here? + ] + ), + '[' + bits.join(' ') + ']', + new EndTagTk( 'a' ), + new EndTagTk( 'span' ) ]; - return tokenCTX; + //console.warn( 'ref res: ' + JSON.stringify( res, null, 2 ) ); + return { tokens: res }; }; /** @@ -198,107 +84,121 @@ Cite.prototype.onRef = function ( tokenCTX ) { * @param {Object} TokenContext * @returns {Object} TokenContext */ -Cite.prototype.onReferences = function ( tokenCTX ) { +Cite.prototype.onReferences = function ( token, manager ) { + if ( token.constructor === EndTagTk ) { + return {}; + } + + //console.warn( 'references refGroups:' + JSON.stringify( this.refGroups, null, 2 ) ); var refGroups = this.refGroups; var arrow = '↑'; var renderLine = function( ref ) { - //console.warn('reftokens: ' + JSON.stringify(ref.tokens, null, 2)); - var out = [{ - type: 'TAG', - name: 'li', - attribs: [['id', ref.target]] - }]; + var out = [ new TagTk('li', [new KV('id', ref.target)] ) ]; if (ref.linkbacks.length == 1) { out = out.concat([ - { - type: 'TAG', - name: 'a', - attribs: [ - ['data-type', 'hashlink'], - ['href', '#' + ref.linkbacks[0]] - ] - }, - {type: 'TEXT', value: arrow}, - {type: 'ENDTAG', name: 'a'} + new TagTk( 'a', [ + new KV('href', '#' + ref.linkbacks[0]) + ] + ), + arrow, + new EndTagTk( 'a' ) ], ref.tokens // The original content tokens ); } else { - out.content.push({type: 'TEXT', value: arrow}); + out.push( arrow ); $.each(ref.linkbacks, function(i, linkback) { out = out.concat([ - { - type: 'TAG', - name: 'a', - attribs: [ - ['data-type', 'hashlink'], - ['href', '#' + ref.linkbacks[0]] + new TagTk( 'a', [ + new KV('data-type', 'hashlink'), + new KV('href', '#' + ref.linkbacks[0]) ] - }, + ), // XXX: make formatNum available! //{ // type: 'TEXT', // value: env.formatNum( ref.groupIndex + '.' + i) //}, - {type: 'TEXT', value: ref.groupIndex + '.' + i}, - {type: 'ENDTAG', name: 'a'} + ref.groupIndex + '.' + i, + new EndTagTk( 'a' ) ], ref.tokens // The original content tokens ); }); } + //console.warn( 'renderLine res: ' + JSON.stringify( out, null, 2 )); return out; }; - var token = tokenCTX.token; + var res; var options = $.extend({ name: null, group: null - }, this.attribsToObject(token.attribs)); + }, this.manager.env.KVtoHash(token.attribs)); if (options.group in refGroups) { var group = refGroups[options.group]; var listItems = $.map(group.refs, renderLine); - tokenCTX.token = [ - { - type: 'TAG', - name: 'ol', - attribs: [ - ['class', 'references'], - ['data-object', 'references'] // Object type - ] - } - ].concat( listItems, { type: 'ENDTAG', name: 'ol' } ); + res = [ + new TagTk( 'ol', [ + new KV('class', 'references'), + new KV('data-object', 'references') // Object type + ] + ) + ].concat( listItems, [ new EndTagTk( 'ol' ) ] ); } else { - tokenCTX.token = { - type: 'SELFCLOSINGTAG', - name: 'placeholder', - attribs: [ - ['data-origNode', JSON.stringify(token)] - ] - }; + res = [ new SelfclosingTagTk( 'meta', [ new KV('fixme', 'add-rdfa-rt-info') ] ) ]; } - return tokenCTX; + res = res.map( this.manager.env.setTokenRank.bind( res, this.referencesRank ) ); + //console.warn( 'references res: ' + JSON.stringify( res, null, 2 ) ); + return { tokens: res }; }; -/** - * Handle end token. - * - * @method - * @param {Object} TokenContext - * @returns {Object} TokenContext - */ -Cite.prototype.onEnd = function ( tokenCTX ) { - // XXX: Emit error messages if references tag was missing! - // Clean up - this.refGroups = {}; - this.refTokens = []; - this.isActive = false; - return tokenCTX; +Cite.prototype.getRefGroup = function(group) { + var refGroups = this.refGroups; + if (!(group in refGroups)) { + var refs = [], + byName = {}; + refGroups[group] = { + refs: refs, + byName: byName, + add: function(tokens, options) { + var ref; + if (options.name && options.name in byName) { + ref = byName[options.name]; + } else { + var n = refs.length; + var key = n + ''; + if (options.name) { + key = options.name + '-' + key; + } + ref = { + tokens: tokens, + index: n, + groupIndex: n, // @fixme + name: options.name, + group: options.group, + key: key, + target: 'cite_note-' + key, + linkbacks: [] + }; + refs[n] = ref; + if (options.name) { + byName[options.name] = ref; + } + } + ref.linkbacks.push( + 'cite_ref-' + ref.key + '-' + ref.linkbacks.length + ); + return ref; + } + }; + } + return refGroups[group]; }; if (typeof module == "object") { diff --git a/modules/parser/ext.core.AttributeExpander.js b/modules/parser/ext.core.AttributeExpander.js index 260bafddf1..6ce879bec0 100644 --- a/modules/parser/ext.core.AttributeExpander.js +++ b/modules/parser/ext.core.AttributeExpander.js @@ -36,6 +36,7 @@ AttributeExpander.prototype.onToken = function ( token, frame, cb ) { token.constructor === SelfclosingTagTk) && token.attribs && token.attribs.length ) { + // clone the token token = $.extend( {}, token ); token.attribs = token.attribs.slice(); var atm = new AttributeTransformManager( @@ -45,7 +46,7 @@ AttributeExpander.prototype.onToken = function ( token, frame, cb ) { cb( { async: true } ); atm.process( token.attribs ); } else { - cb ( { token: token } ); + cb ( { tokens: [token] } ); } }; @@ -58,7 +59,7 @@ AttributeExpander.prototype._returnAttributes = function ( token, cb, { this.manager.env.dp( 'AttributeExpander._returnAttributes: ',attributes ); token.attribs = attributes; - cb( { token: token } ); + cb( { tokens: [token] } ); }; if (typeof module == "object") { diff --git a/modules/parser/ext.core.BehaviorSwitchHandler.js b/modules/parser/ext.core.BehaviorSwitchHandler.js index 36f365992f..67a226b685 100644 --- a/modules/parser/ext.core.BehaviorSwitchHandler.js +++ b/modules/parser/ext.core.BehaviorSwitchHandler.js @@ -6,7 +6,7 @@ function BehaviorSwitchHandler( manager, isInclude ) { this.manager.addTransform( this.onBehaviorSwitch.bind( this ), this.rank, 'tag', 'behavior-switch' ); } -BehaviorSwitchHandler.prototype.rank = 1.14; +BehaviorSwitchHandler.prototype.rank = 2.14; BehaviorSwitchHandler.prototype.onBehaviorSwitch = function ( token, manager, cb ) { var env = this.manager.env, diff --git a/modules/parser/ext.core.LinkHandler.js b/modules/parser/ext.core.LinkHandler.js index 399b407aa3..ad39dbe3c0 100644 --- a/modules/parser/ext.core.LinkHandler.js +++ b/modules/parser/ext.core.LinkHandler.js @@ -40,7 +40,15 @@ WikiLinkHandler.prototype.onWikiLink = function ( token, frame, cb ) { // Check if page exists // //console.warn( 'title: ' + JSON.stringify( title ) ); - var obj = new TagTk( 'a', [ new KV( 'href', title.makeLink() ) ] ), + var obj = new TagTk( 'a', + [ + new KV( 'href', title.makeLink() ), + new KV('typeof', 'http://mediawiki.org/rdf/wikilink'), + // Add resource as CURIE- needs global default prefix + // definition. + new KV('resource', '[:' + title.getPrefixedText() + ']') + ] + ), content = token.attribs.slice(1, -1); //console.warn('content: ' + JSON.stringify( content, null, 2 ) ); // XXX: handle trail @@ -98,7 +106,8 @@ WikiLinkHandler.prototype._prefixImageOptions = { 'alt': 'alt', 'page': 'page', 'thumbnail': 'thumb', - 'thumb': 'thumb' + 'thumb': 'thumb', + 'upright': 'aspect' }; WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) { @@ -150,7 +159,7 @@ WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) { } else { var bits = oText.split( '=', 2 ), key = this._prefixImageOptions[ bits[0].trim().toLowerCase() ]; - if ( bits.length > 1 && key) { + if ( bits[0] && key) { oHash[key] = bits[1]; options.push( new KV( key, bits[1] ) ); //console.warn('handle prefix ' + bits ); @@ -184,9 +193,8 @@ WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) { a.dataAttribs = token.dataAttribs; var width, height; - if ( ! height in oHash && ! width in oHash ) { - width = '120px'; - height = '120px'; + if ( ! oHash.height && ! oHash.width ) { + width = '200px'; } else { width = oHash.width; height = oHash.height; @@ -212,7 +220,18 @@ WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, pa a.dataAttribs.optionHash = oHash; a.dataAttribs.optionList = options; - var figurestyle = "width: 125px;", + var width = 165; + + // Handle upright + if ( 'aspect' in oHash ) { + if ( oHash.aspect > 0 ) { + width = width * oHash.aspect; + } else { + width *= 0.75; + } + } + + var figurestyle = "width: " + (width + 5) + "px;", figureclass = "thumb tright thumbinner"; // set horizontal alignment @@ -243,6 +262,7 @@ WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, pa new KV('class', figureclass), new KV('style', figurestyle), new KV('typeof', 'http://mediawiki.org/rdf/Thumb'), + // XXX: define this globally? new KV('prefix', "mw: http://mediawiki.org/rdf/terms/") ] ), @@ -257,11 +277,13 @@ WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, pa 'img', [ new KV('src', path), - new KV('width', '120px'), - new KV('height', '120px'), + new KV('width', width + 'px'), + //new KV('height', '160px'), new KV('class', 'thumbimage'), new KV('alt', oHash.alt || title.key ), - new KV('resource', title.getPrefixedText()) + // Add resource as CURIE- needs global default prefix + // definition. + new KV('resource', '[:' + title.getPrefixedText() + ']') ] ), new EndTagTk( 'a' ), @@ -345,17 +367,25 @@ ExternalLinkHandler.prototype.onUrlLink = function ( token, frame, cb ) { env.tokensToString( env.lookupKV( token.attribs, 'href' ).v ) ); if ( this._isImageLink( href ) ) { - cb( { token: new SelfclosingTagTk( 'img', - [ + cb( { tokens: [ new SelfclosingTagTk( 'img', + [ new KV( 'src', href ), - new KV( 'alt', href.split('/').last() ) - ] - ) + new KV( 'alt', href.split('/').last() ), + new KV('typeof', 'http://mediawiki.org/rdf/externalImage') + ], + { type: 'urllink' } + ) + ] } ); } else { cb( { tokens: [ - new TagTk( 'a', [ new KV( 'href', href ) ] ), + new TagTk( 'a', + [ + new KV( 'href', href ), + new KV('typeof', 'http://mediawiki.org/rdf/externalLink') + ], + { type: 'urllink' } ), href, new EndTagTk( 'a' ) ] @@ -384,7 +414,8 @@ ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) { [ new KV( 'src', src ), new KV( 'alt', src.split('/').last() ) - ] ) + ], + { type: 'extlink' }) ]; } @@ -393,7 +424,11 @@ ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) { [ new TagTk ( 'a', - [ new KV('href', href) ], + [ + new KV('href', href), + new KV('typeof', 'http://mediawiki.org/rdf/externalLink'), + new KV('property', 'http://mediawiki.org/rdf/terms/linkcontent') + ], token.dataAttribs ) ].concat( content, [ new EndTagTk( 'a' )]) diff --git a/modules/parser/ext.core.ListHandler.js b/modules/parser/ext.core.ListHandler.js new file mode 100644 index 0000000000..8d81b608b2 --- /dev/null +++ b/modules/parser/ext.core.ListHandler.js @@ -0,0 +1,150 @@ +/* + * Create list tag around list items and map wiki bullet levels to html + */ + +function ListHandler ( manager ) { + this.manager = manager + this.reset(); + this.manager.addTransform( this.onListItem.bind(this), + this.listRank, 'tag', 'listItem' ); + this.manager.addTransform( this.onEnd.bind(this), + this.listRank, 'end' ); +} + +ListHandler.prototype.listRank = 2.49; // before PostExpandParagraphHandler + +ListHandler.prototype.bulletCharsMap = { + '*': { list: 'ul', item: 'li' }, + '#': { list: 'ol', item: 'li' }, + ';': { list: 'dl', item: 'dt' }, + ':': { list: 'dl', item: 'dd' }, +}; + +ListHandler.prototype.reset = function() { + this.newline = false; // flag to identify a list-less line that terminates + // a list block + this.bstack = []; // Bullet stack, previous element's listStyle + this.endtags = []; // Stack of end tags +}; + +ListHandler.prototype.onNewline = function ( token, frame, prevToken ) { + var tokens = [token]; + if (this.newline) { + // second newline without a list item in between, close the list + tokens = this.end().concat( tokens ); + } + this.newline = true; + return { tokens: tokens }; +}; + +ListHandler.prototype.onEnd = function( token, frame, prevToken ) { + return { tokens: this.end().concat([token]) }; +}; + +ListHandler.prototype.end = function( ) { + // pop all open list item tokens + var tokens = this.popTags(this.bstack.length); + this.reset(); + this.manager.removeTransform( this.listRank, 'newline' ); + return tokens; +}; + +ListHandler.prototype.onListItem = function ( token, frame, prevToken ) { + if (token.constructor === TagTk){ + // convert listItem to list and list item tokens + return { tokens: this.doListItem( this.bstack, token.bullets ) }; + } + return { token: token }; +}; + +ListHandler.prototype.commonPrefixLength = function (x, y) { + var minLength = Math.min(x.length, y.length); + for(var i = 0; i < minLength; i++) { + if (x[i] != y[i]) + break; + } + return i; +}; + +ListHandler.prototype.pushList = function ( container ) { + this.endtags.push( new EndTagTk( container.list )); + this.endtags.push( new EndTagTk( container.item )); + return [ + new TagTk( container.list ), + new TagTk( container.item ) + ]; +}; + +ListHandler.prototype.popTags = function ( n ) { + var tokens = []; + for(;n > 0; n--) { + // push list item.. + tokens.push(this.endtags.pop()); + // and the list end tag + tokens.push(this.endtags.pop()); + } + return tokens; +}; + +ListHandler.prototype.isDlDd = function (a, b) { + var ab = [a,b].sort(); + return (ab[0] === ':' && ab[1] === ';'); +}; + +ListHandler.prototype.doListItem = function ( bs, bn ) { + var prefixLen = this.commonPrefixLength (bs, bn), + changeLen = Math.max(bs.length, bn.length) - prefixLen, + prefix = bn.slice(0, prefixLen); + this.newline = false; + this.bstack = bn; + if (!bs.length) + { + this.manager.addTransform( this.onNewline.bind(this), + this.listRank, 'newline' ); + } + // emit close tag tokens for closed lists + if (changeLen === 0) + { + var itemToken = this.endtags.pop(); + this.endtags.push(new EndTagTk( itemToken.name )); + return [ + itemToken, + new TagTk( itemToken.name ) + ]; + } + else if ( bs.length == bn.length + && changeLen == 1 + && this.isDlDd( bs[prefixLen], bn[prefixLen] ) ) + { + // handle dd/dt transitions + var newName = this.bulletCharsMap[bn[prefixLen]].item; + this.endtags.push(new EndTagTk( newName )); + return [ + this.endtags.pop(), + new TagTk( newName ) + ]; + } + else + { + var tokens = this.popTags(bs.length - prefixLen); + + if (prefixLen > 0 && bn.length == prefixLen ) { + var itemToken = this.endtags.pop(); + tokens.push(itemToken); + tokens.push(new TagTk( itemToken.name )); + this.endtags.push(new EndTagTk( itemToken.name )); + } + + for(var i = prefixLen; i < bn.length; i++) { + if (!this.bulletCharsMap[bn[i]]) + throw("Unknown node prefix " + prefix[i]); + + tokens = tokens.concat(this.pushList(this.bulletCharsMap[bn[i]])); + } + return tokens; + } +}; + +if (typeof module == "object") { + module.exports.ListHandler = ListHandler; +} diff --git a/modules/parser/ext.core.NoIncludeOnly.js b/modules/parser/ext.core.NoIncludeOnly.js index 815be686c0..cdf5428bfb 100644 --- a/modules/parser/ext.core.NoIncludeOnly.js +++ b/modules/parser/ext.core.NoIncludeOnly.js @@ -44,7 +44,7 @@ OnlyInclude.prototype.onAnyInclude = function ( token, manager ) { var res = this.accum; res.push( token ); this.accum = []; - this.manager.setTokensRank( res, this.rank + 0.001 ); + //this.manager.setTokensRank( res, this.rank + 0.001 ); return { tokens: res }; } else { this.foundOnlyInclude = false; @@ -70,11 +70,11 @@ OnlyInclude.prototype.onAnyInclude = function ( token, manager ) { meta = new TagTk( 'meta' ); meta.dataAttribs = { strippedTokens: [token] }; } - meta.rank = this.rank; + //meta.rank = this.rank; return { token: meta }; } else { if ( this.inOnlyInclude ) { - token.rank = this.rank; + //token.rank = this.rank; return { token: token }; } else { this.accum.push( token ); diff --git a/modules/parser/ext.core.ParserFunctions.js b/modules/parser/ext.core.ParserFunctions.js index 786dfc4775..936ce4cb2e 100644 --- a/modules/parser/ext.core.ParserFunctions.js +++ b/modules/parser/ext.core.ParserFunctions.js @@ -17,6 +17,7 @@ * @author Gabriel Wicke */ +var async = require('async'); function ParserFunctions ( manager ) { this.manager = manager; @@ -24,22 +25,52 @@ function ParserFunctions ( manager ) { } // Temporary helper. -ParserFunctions.prototype._rejoinKV = function ( kv ) { - if ( kv.k && kv.k.length ) { - return kv.k.concat( ['='], kv.v ); +ParserFunctions.prototype._rejoinKV = function ( k, v ) { + if ( k.length ) { + return k.concat( ['='], v ); } else { - return kv.v; + return v; } }; +// XXX: move to frame? +ParserFunctions.prototype.expandKV = function ( kv, cb, defaultValue, type ) { + if ( type === undefined ) { + type = 'tokens/x-mediawiki/expanded'; + } + if ( kv === undefined ) { + cb( { tokens: [ defaultValue || '' ] } ); + } else if ( kv.constructor === String ) { + return cb( kv ); + } else if ( kv.k.constructor === String && kv.v.constructor === String ) { + if ( kv.k ) { + cb( { tokens: [kv.k + '=' + kv.v] } ); + } else { + cb( { tokens: [kv.v] } ); + } + } else { + var self = this, + getCB = function ( v ) { + cb ( { tokens: + self._rejoinKV( kv.k, v ) } ); + }; + kv.v.get({ + type: type, + cb: getCB, + asyncCB: cb + }); + } +}; + + ParserFunctions.prototype['pf_#if'] = function ( token, frame, cb, args ) { var target = args[0].k; if ( target.trim() !== '' ) { //this.env.dp('#if, first branch', target.trim(), argDict[1] ); - cb( { tokens: (args[1] && this._rejoinKV( args[1] ) || [] ) } ); + this.expandKV( args[1], cb ); } else { //this.env.dp('#if, second branch', target.trim(), argDict[2] ); - cb( { tokens: (args[2] && this._rejoinKV( args[2] ) || [] ) } ); + this.expandKV( args[2], cb ); } }; @@ -51,16 +82,22 @@ ParserFunctions.prototype._switchLookupFallback = function ( frame, kvs, key, di if ( v && key === v.trim() ) { // found. now look for the next entry with a non-empty key. this.manager.env.dp( 'switch found' ); + cb = function( res ) { cb ( { tokens: res } ); }; for ( var j = 0; j < l; j++) { kv = kvs[j]; // XXX: make sure the key is always one of these! if ( kv.k.length ) { - return cb( { tokens: kv.v } ); + return kv.v.get({ + type: 'tokens/x-mediawiki/expanded', + cb: cb, + asyncCB: cb + }); } } // No value found, return empty string? XXX: check this return cb( { } ); } else if ( kvs.length ) { + // search for value-only entry which matches var i = 0; if ( v ) { i = 1; @@ -68,30 +105,44 @@ ParserFunctions.prototype._switchLookupFallback = function ( frame, kvs, key, di for ( ; i < l; i++ ) { kv = kvs[i]; if ( kv.k.length || !kv.v.length ) { + // skip entries with keys or empty values continue; } else { - if ( ! kv.v.to ) { + if ( ! kv.v.get ) { this.manager.env.ap( kv.v ); console.trace(); } - return kv.v.to( 'text/plain/expanded', - this._switchLookupFallback.bind( this, frame, kvs.slice(i), key, dict, cb ), - cb ); + var self = this; + cb({ async: true }); + return kv.v.get( { + cb: process.nextTick.bind( process, + self._switchLookupFallback.bind( this, frame, + kvs.slice(i+1), key, dict, cb ) ), + asyncCB: cb + }); } } // value not found! if ( '#default' in dict ) { - cb( { tokens: dict['#default'] } ); - } else if ( kvs.length ) { + dict['#default'].get({ + type: 'tokens/x-mediawiki/expanded', + cb: function( res ) { cb ( { tokens: res } ); }, + asyncCB: cb + }); + /*} else if ( kvs.length ) { var lastKV = kvs[kvs.length - 1]; if ( lastKV && ! lastKV.k.length ) { cb ( { tokens: lastKV.v } ); } else { cb ( {} ); - } + }*/ } else { + // nothing found at all. cb ( {} ); } + } else { + // nothing found at all. + cb ( {} ); } }; @@ -105,7 +156,11 @@ ParserFunctions.prototype['pf_#switch'] = function ( token, frame, cb, args ) { var dict = args.dict(); if ( target && dict[target] !== undefined ) { this.env.dp( 'switch found: ', target, dict, ' res=', dict[target] ); - cb ( {tokens: dict[target] } ); + dict[target].get({ + type: 'tokens/x-mediawiki/expanded', + cb: function( res ) { cb ( { tokens: res } ); }, + asyncCB: cb + }); } else { this._switchLookupFallback( frame, args, target, dict, cb ); } @@ -117,15 +172,15 @@ ParserFunctions.prototype['pf_#ifeq'] = function ( token, frame, cb, args ) { cb( {} ); } else { var b = args[1].v; - b.to( 'text/plain/expanded', this._ifeq_worker.bind( this, cb, args ), cb ); + b.get( { cb: this._ifeq_worker.bind( this, cb, args ), asyncCB: cb } ); } }; ParserFunctions.prototype._ifeq_worker = function ( cb, args, b ) { if ( args[0].k.trim() === b.trim() ) { - cb( { tokens: ( args[2] && this._rejoinKV( args[2] ) || [] ) } ); + this.expandKV( args[2], cb ); } else { - cb( { tokens: ( args[3] && this._rejoinKV( args[3] ) || [] ) } ); + this.expandKV( args[3], cb ); } }; @@ -162,18 +217,18 @@ ParserFunctions.prototype['pf_#ifexpr'] = function ( token, frame, cb, args ) { } if ( res ) { - cb( { tokens: args[1] && this._rejoinKV( args[1] ) || [] } ); + this.expandKV( args[1], cb ); } else { - cb( { tokens: args[2] && this._rejoinKV( args[2] ) || [] } ); + this.expandKV( args[2], cb ); } }; ParserFunctions.prototype['pf_#iferror'] = function ( token, frame, cb, args ) { var target = args[0].k; if ( target.indexOf( 'class="error"' ) >= 0 ) { - cb( { tokens: args[1] && args[1].v || [] } ); + this.expandKV( args[1], cb ); } else { - cb( { tokens: args[2] && args[2].v || [ target ] } ); + this.expandKV( args[1], cb, target ); } }; @@ -203,51 +258,82 @@ ParserFunctions.prototype.pf_lcfirst = function ( token, frame, cb, args ) { cb( {} ); } }; -ParserFunctions.prototype.pf_padleft = function ( token, frame, cb, args ) { - var target = args[0].k, - pad; - if ( args[1] && args[1].v > 0) { - if ( args[2] && args[2].v ) { - pad = args[2].v; - } else { - pad = '0'; - } - var n = args[1].v; - while ( target.length < n ) { - target = pad + target; - } - cb( { tokens: [target] } ); - } else { - cb( {} ); +ParserFunctions.prototype.pf_padleft = function ( token, frame, cb, params ) { + var target = params[0].k; + if ( ! params[1] ) { + return cb( {} ); } + // expand parameters 1 and 2 + params.getSlice( { + type: 'text/x-mediawiki/expanded', + cb: function ( args ) { + if ( args[0].v > 0) { + var pad = '0'; + if ( args[1] && args[1].v !== '' ) { + pad = args[1].v; + } + var n = args[0].v; + while ( target.length < n ) { + target = pad + target; + } + cb( { tokens: [target] } ); + } else { + self.env.dp( 'padleft no pad width', args ); + cb( {} ); + } + } + }, + 1, 3); }; -ParserFunctions.prototype.pf_padright = function ( token, frame, cb, args ) { - var target = args[0].k; - if ( args[1] && args[1].v > 0) { - if ( args[2] && args[2].v ) { - pad = args[2].v; - } else { - pad = '0'; - } - var n = args[1].v; - while ( target.length < n ) { - target = target + pad; - } - cb( { tokens: [target] } ); - } else { - cb( {} ); + +ParserFunctions.prototype.pf_padright = function ( token, frame, cb, params ) { + var target = params[0].k; + if ( ! params[1] ) { + return cb( {} ); } + // expand parameters 1 and 2 + params.getSlice( { + type: 'text/x-mediawiki/expanded', + cb: function ( args ) { + if ( args[0].v > 0) { + if ( args[1] && args[1].v !== '' ) { + pad = args[1].v; + } else { + pad = '0'; + } + var n = args[0].v; + while ( target.length < n ) { + target = target + pad; + } + cb( { tokens: [target] } ); + } else { + cb( {} ); + } + } + }, + 1, 3 ); }; ParserFunctions.prototype['pf_#tag'] = function ( token, frame, cb, args ) { // TODO: handle things like {{#tag:nowiki|{{{input1|[[shouldnotbelink]]}}}}} // https://www.mediawiki.org/wiki/Future/Parser_development#Token_stream_transforms var target = args[0].k; - cb( { tokens: ( [ new TagTk( target ) ] - .concat( args[1].v, - [ new EndTagTk( target ) ] ) ) } ); + args[1].v.get({ + type: 'tokens/x-mediawiki/expanded', + cb: this.tag_worker.bind( this, target, cb ), + asyncCB: cb + }); }; +ParserFunctions.prototype.tag_worker = function( target, cb, content ) { + cb({ + tokens: [ new TagTk( target ) ] + .concat( content, + [ new EndTagTk( target ) ] ) + }); +}; + + // TODO: These are just quick wrappers for now, optimize! ParserFunctions.prototype.pf_currentyear = function ( token, frame, cb, args ) { cb( this._pf_time_tokens( 'Y', [], {} ) ); @@ -438,22 +524,30 @@ Date.replaceChars = { }; ParserFunctions.prototype.pf_localurl = function ( token, frame, cb, args ) { - var target = args[0].k; + var target = args[0].k, + env = this.env, + self = this; args = args.slice(1); - cb( { tokens: ( - '/' + - // FIXME! Figure out correct prefix to use - //this.env.wgScriptPath + - 'index' + - this.env.wgScriptExtension + '?title=' + - this.env.normalizeTitle( target ) + '&' + - args.map( - function( kv ) { - //console.warn( JSON.stringify( kv ) ); - return (kv.v !== '' && kv.k + '=' + kv.v ) || kv.k; - } - ).join('&') - ) } ); + async.map( + args, + function ( item, cb ) { + self.expandKV( item, function ( res ) { cb( null, res.tokens ); }, '', 'text/x-mediawiki/expanded' ); + }, + function ( err, expandedArgs ) { + if ( err ) { + console.trace(); + throw( err ); + } + cb({ tokens: [ '/' + + // FIXME! Figure out correct prefix to use + //this.env.wgScriptPath + + 'index' + + env.wgScriptExtension + '?title=' + + env.normalizeTitle( target ) + '&' + + expandedArgs.join('&') ] + }); + } + ); }; @@ -503,7 +597,7 @@ ParserFunctions.prototype.pf_urlencode = function ( token, frame, cb, args ) { // http://www.mediawiki.org/wiki/Wikitext_parser/Environment. // There might be better solutions for some of these. ParserFunctions.prototype['pf_#ifexist'] = function ( token, frame, cb, args ) { - cb( { tokens: ( args[1] && args[1].v ) || [] } ); + this.expandKV( args[1], cb ); }; ParserFunctions.prototype.pf_pagesize = function ( token, frame, cb, args ) { cb( { tokens: [ '100' ] } ); diff --git a/modules/parser/ext.core.PostExpandParagraphHandler.js b/modules/parser/ext.core.PostExpandParagraphHandler.js index d77714edab..1901b4e4ec 100644 --- a/modules/parser/ext.core.PostExpandParagraphHandler.js +++ b/modules/parser/ext.core.PostExpandParagraphHandler.js @@ -16,8 +16,8 @@ function PostExpandParagraphHandler ( dispatcher ) { } // constants -PostExpandParagraphHandler.prototype.newlineRank = 2.2; -PostExpandParagraphHandler.prototype.anyRank = 2.201; // Just after regular quote and newline +PostExpandParagraphHandler.prototype.newlineRank = 2.5; +PostExpandParagraphHandler.prototype.anyRank = 2.501; // Just after regular quote and newline // Register this transformer with the TokenTransformer @@ -43,9 +43,6 @@ PostExpandParagraphHandler.prototype.reset = function ( token, frame, cb ) { PostExpandParagraphHandler.prototype._finish = function ( ) { var tokens = this.tokens; this.tokens = []; - for ( var i = 0, l = tokens.length; i < l; i++ ) { - tokens[ i ].rank = this.anyRank; - } // remove 'any' registration this.dispatcher.removeTransform( this.anyRank, 'any' ); this.newLines = 0; diff --git a/modules/parser/ext.core.QuoteTransformer.js b/modules/parser/ext.core.QuoteTransformer.js index a81feebc98..8f5761fa8a 100644 --- a/modules/parser/ext.core.QuoteTransformer.js +++ b/modules/parser/ext.core.QuoteTransformer.js @@ -33,14 +33,9 @@ QuoteTransformer.prototype.reset = function ( ) { // Register this transformer with the TokenTransformer QuoteTransformer.prototype.register = function ( dispatcher ) { this.dispatcher = dispatcher; - // Register for NEWLINE and QUOTE tag tokens - dispatcher.addTransform( this.onNewLine.bind(this), - this.quoteAndNewlineRank, 'newline' ); + // Register for QUOTE tag tokens dispatcher.addTransform( this.onQuote.bind(this), this.quoteAndNewlineRank, 'tag', 'mw-quote' ); - // Treat end-of-input just the same as a newline - dispatcher.addTransform( this.onNewLine.bind(this), - this.quoteAndNewlineRank, 'end' ); }; // Make a copy of the token context @@ -69,6 +64,11 @@ QuoteTransformer.prototype.onQuote = function ( token, frame, prevToken ) { if ( ! this.isActive ) { + this.dispatcher.addTransform( this.onNewLine.bind(this), + this.quoteAndNewlineRank, 'newline' ); + // Treat end-of-input just the same as a newline + this.dispatcher.addTransform( this.onNewLine.bind(this), + this.quoteAndNewlineRank, 'end' ); // register for any token if not yet active this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' ); this.isActive = true; @@ -137,7 +137,7 @@ QuoteTransformer.prototype.onNewLine = function ( token, frame, prevToken ) { } - token.rank = this.quoteAndNewlineRank; + //token.rank = this.quoteAndNewlineRank; //console.warn('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); @@ -203,7 +203,9 @@ QuoteTransformer.prototype.onNewLine = function ( token, frame, prevToken ) { // prepare for next line this.reset(); - // remove 'any' registration + // remove 'end', 'newline' and 'any' registrations + this.dispatcher.removeTransform( this.quoteAndNewlineRank, 'end' ); + this.dispatcher.removeTransform( this.quoteAndNewlineRank, 'newline' ); this.dispatcher.removeTransform( this.anyRank, 'any' ); return res; diff --git a/modules/parser/ext.core.Sanitizer.js b/modules/parser/ext.core.Sanitizer.js index 5b02bd967a..f9fd257205 100644 --- a/modules/parser/ext.core.Sanitizer.js +++ b/modules/parser/ext.core.Sanitizer.js @@ -115,7 +115,7 @@ Sanitizer.prototype.onAny = function ( token ) { } attribs[i] = new KV( k, v ); } - //console.warn(JSON.stringify([attribs, token], null, 2)); + //console.warn( 'sanitizer: ' + JSON.stringify([attribs, token], null, 2)); newToken.attribs = attribs; token = newToken; } diff --git a/modules/parser/ext.core.TemplateHandler.js b/modules/parser/ext.core.TemplateHandler.js index c5f4c80acc..092f284d54 100644 --- a/modules/parser/ext.core.TemplateHandler.js +++ b/modules/parser/ext.core.TemplateHandler.js @@ -52,12 +52,6 @@ TemplateHandler.prototype.onTemplate = function ( token, frame, cb ) { //console.warn('onTemplate! ' + JSON.stringify( token, null, 2 ) + // ' args: ' + JSON.stringify( this.manager.args )); - - // create a new temporary frame for argument and title expansions - // XXX: only expand keys, and leave value expansion to template parameter - // replacement or parser functions as needed! - - // expand argument keys, with callback set to next processing step // XXX: would likely be faster to do this in a tight loop here var atm = new AttributeTransformManager( @@ -77,7 +71,7 @@ TemplateHandler.prototype._nameArgs = function ( attribs ) { for ( var i = 0, l = attribs.length; i < l; i++ ) { // FIXME: Also check for whitespace-only named args! if ( ! attribs[i].k.length ) { - out.push( {k: [ n.toString() ], v: attribs[i].v } ); + out.push( new KV( n.toString(), attribs[i].v ) ); n++; } else { out.push( attribs[i] ); @@ -129,7 +123,7 @@ TemplateHandler.prototype._expandTemplate = function ( token, frame, cb, attribs // 'unnamedArgs', tplExpandData.origToken.attribs, // 'funcArg:', funcArg // ); - //this.manager.env.dp( 'entering prefix', funcArg, tplExpandData.expandedArgs ); + this.manager.env.dp( 'entering prefix', target, token ); this.parserFunctions[ 'pf_' + prefix ] ( token, this.manager.frame, cb, pfAttribs ); return; @@ -156,7 +150,8 @@ TemplateHandler.prototype._expandTemplate = function ( token, frame, cb, attribs templateName, new EndTagTk( 'a' ) ]; - cb( { tokens: res, allTokensProcessed: true } ); + res.rank = this.manager.phaseEndRank; + cb( { tokens: res } ); return; } @@ -192,7 +187,7 @@ TemplateHandler.prototype._processTemplateAndTitle = function( token, frame, cb, pipeline.addListener( 'chunk', this._onChunk.bind ( this, cb ) ); pipeline.addListener( 'end', this._onEnd.bind ( this, cb ) ); // Feed the pipeline. XXX: Support different formats. - this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle', name, src, attribs ); + this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle', name, attribs ); pipeline.process ( src, name ); }; @@ -201,7 +196,7 @@ TemplateHandler.prototype._processTemplateAndTitle = function( token, frame, cb, */ TemplateHandler.prototype._onChunk = function( cb, chunk ) { // We encapsulate the output by default, so collect tokens here. - this.manager.env.stripEOFTkfromTokens( chunk ); + chunk = this.manager.env.stripEOFTkfromTokens( chunk ); this.manager.env.dp( 'TemplateHandler._onChunk', chunk ); cb( { tokens: chunk, async: true } ); }; @@ -234,7 +229,7 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function ( title, parentCB, c this.manager.env.dp( 'Note: trying to fetch ', title ); // Start a new request if none is outstanding - this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue ); + //this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue ); if ( this.manager.env.requestQueue[title] === undefined ) { this.manager.env.tp( 'Note: Starting new request for ' + title ); this.manager.env.requestQueue[title] = new TemplateRequest( this.manager, title ); @@ -269,24 +264,29 @@ TemplateHandler.prototype.onTemplateArg = function ( token, frame, cb ) { TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, attributes ) { //console.warn( '_returnArgAttributes: ' + JSON.stringify( attributes )); - var argName = this.manager.env.tokensToString( attributes[0].v ).trim(), + var argName = this.manager.env.tokensToString( attributes[0].k ).trim(), res, dict = this.manager.frame.args.named(); - this.manager.env.dp( 'args', argName, dict ); + this.manager.env.dp( 'args', argName /*, dict*/ ); if ( argName in dict ) { // return tokens for argument //console.warn( 'templateArg found: ' + argName + // ' vs. ' + JSON.stringify( this.manager.args ) ); res = dict[argName]; + this.manager.env.dp( 'arg res:', res ); if ( res.constructor === String ) { cb( { tokens: [res] } ); } else { - dict[argName].to('tokens/x-mediawiki/expanded', function(chunk) { cb ( { tokens: chunk } ); }); + dict[argName].get({ + type: 'tokens/x-mediawiki/expanded', + cb: function( res ) { cb ( { tokens: res } ); }, + asyncCB: cb + }); } return; } else { - this.manager.env.dp( 'templateArg not found: ', argName, - ' vs. ', dict ); + this.manager.env.dp( 'templateArg not found: ', argName + /*' vs. ', dict */ ); if ( attributes.length > 1 ) { res = attributes[1].v; } else { @@ -334,7 +334,7 @@ function TemplateRequest ( manager, title ) { // Inherit from EventEmitter TemplateRequest.prototype = new events.EventEmitter(); -TemplateHandler.prototype.constructor = TemplateRequest; +TemplateRequest.prototype.constructor = TemplateRequest; TemplateRequest.prototype._handler = function (error, response, body) { //console.warn( 'response for ' + title + ' :' + body + ':' ); @@ -353,7 +353,7 @@ TemplateRequest.prototype._handler = function (error, response, body) { } else if(response.statusCode == 200) { var src = '', data, - normalizedTitle; + normalizedTitle; try { //console.warn( 'body: ' + body ); data = JSON.parse( body ); @@ -375,8 +375,29 @@ TemplateRequest.prototype._handler = function (error, response, body) { console.warn( 'Did not find page revisions in the returned body:' + body ); src = ''; } + + // check for #REDIRECT + var redirMatch = src.match( /[\r\n\s]*#\s*redirect\s\[\[([^\]]+)\]\]/i ) + if ( redirMatch ) { + var title = redirMatch[1]; + var url = this.manager.env.wgScript + '/api' + + this.manager.env.wgScriptExtension + + '?' + + qs.stringify( { + format: 'json', + action: 'query', + prop: 'revisions', + rvprop: 'content', + titles: title + } ); + //'?format=json&action=query&prop=revisions&rvprop=content&titles=' + title; + this.requestOptions.url = url; + request( this.requestOptions, this._handler.bind(this) ); + return; + } + //console.warn( 'Page ' + title + ': got ' + src ); - this.manager.env.tp( 'Retrieved ' + this.title ); + this.manager.env.tp( 'Retrieved ' + this.title, src ); // Add the source to the cache this.manager.env.pageCache[this.title] = src; @@ -387,7 +408,7 @@ TemplateRequest.prototype._handler = function (error, response, body) { // var listeners = this.listeners( 'src' ); var processSome = function () { - // XXX: experiment a bit with the number of callback per + // XXX: experiment a bit with the number of callbacks per // iteration! var maxIters = Math.min(1, listeners.length); for ( var it = 0; it < maxIters; it++ ) { @@ -408,9 +429,9 @@ TemplateRequest.prototype._handler = function (error, response, body) { // XXX: handle other status codes // Remove self from request queue - this.manager.env.dp( 'trying to remove ', this.title, ' from requestQueue' ); + //this.manager.env.dp( 'trying to remove ', this.title, ' from requestQueue' ); delete this.manager.env.requestQueue[this.title]; - this.manager.env.dp( 'after deletion:', this.manager.env.requestQueue ); + //this.manager.env.dp( 'after deletion:', this.manager.env.requestQueue ); }; /* diff --git a/modules/parser/ext.util.TokenCollector.js b/modules/parser/ext.util.TokenCollector.js index 47525b94a4..d42081c371 100644 --- a/modules/parser/ext.util.TokenCollector.js +++ b/modules/parser/ext.util.TokenCollector.js @@ -54,7 +54,13 @@ TokenCollector.prototype._anyDelta = 0.00001; * XXX: Adjust to sync phase callback when that is modified! */ TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) { - if ( this.isActive ) { + if ( token.constructor === SelfclosingTagTk && !this.isActive ) { + this.manager.env.dp( 'skipping collection on ', token ); + // just ignore it + return { tokens: [ token ] }; //this.transformation( [token, token] ); + } else if ( this.isActive && + ( token.constructor === EndTagTk || token.constructor === EOFTk ) ) { + this.manager.env.dp( 'finishing collection on ', token ); var res; // finish processing this.tokens.push ( token ); @@ -63,7 +69,7 @@ TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) { this.manager.removeTransform( this.rank, 'end' ); if ( token.constructor !== EOFTk || this.toEnd ) { // end token - res = this.transformation ( this.tokens, this.cb, this.manager ); + res = this.transformation ( this.tokens ); this.tokens = []; // Transformation can be either sync or async, but receives all collected // tokens instead of a single token. @@ -76,20 +82,27 @@ TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) { return { tokens: res }; } } else if ( token.constructor !== EOFTk ) { - this.manager.env.dp( 'starting collection on ', token ); - // start collection + + if ( this.isActive ) { + this.manager.env.dp( 'already active: ', token ); + } else { + // start collection + this.manager.env.dp( 'starting collection on ', token ); + this.manager.addTransform( this._onAnyToken.bind ( this ), + this.rank + this._anyDelta, 'any' ); + this.manager.addTransform( this._onDelimiterToken.bind( this ), + this.rank, 'end' ); + this.isActive = true; + } this.tokens.push ( token ); - this.manager.addTransform( this._onAnyToken.bind ( this ), - this.rank + this._anyDelta, 'any' ); - this.manager.addTransform( this._onDelimiterToken.bind( this ), - this.rank, 'end' ); // Did not encounter a matching end token before the end, and are not // supposed to collect to the end. So just return the tokens verbatim. - this.isActive = true; return { }; } else { // pass through end token - return { token: token }; + this.tokens = []; + this.isActive = false; + return { tokens: [ token ] }; } }; diff --git a/modules/parser/mediawiki.HTML5TreeBuilder.node.js b/modules/parser/mediawiki.HTML5TreeBuilder.node.js index d02832de19..d1334adefa 100644 --- a/modules/parser/mediawiki.HTML5TreeBuilder.node.js +++ b/modules/parser/mediawiki.HTML5TreeBuilder.node.js @@ -72,16 +72,17 @@ FauxHTML5.TreeBuilder.prototype._att = function (maybeAttribs) { // Adapt the token format to internal HTML tree builder format, call the actual // html tree builder by emitting the token. FauxHTML5.TreeBuilder.prototype.processToken = function (token) { + var attribs = token.attribs || []; if ( token.dataAttribs ) { if ( ! token.attribs ) { token.attribs = []; } - token.attribs.push( + attribs = attribs.concat([ { // Mediawiki-specific round-trip / non-semantic information k: 'data-mw', v: JSON.stringify( token.dataAttribs ) - } ); + } ] ); } switch( token.constructor ) { @@ -93,24 +94,24 @@ FauxHTML5.TreeBuilder.prototype.processToken = function (token) { case TagTk: this.emit('token', {type: 'StartTag', name: token.name, - data: this._att(token.attribs)}); + data: this._att(attribs)}); break; case SelfclosingTagTk: this.emit('token', {type: 'StartTag', name: token.name, - data: this._att(token.attribs)}); + data: this._att(attribs)}); if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) { // VOID_ELEMENTS are automagically treated as self-closing by // the tree builder this.emit('token', {type: 'EndTag', name: token.name, - data: this._att(token.attribs)}); + data: this._att(attribs)}); } break; case EndTagTk: this.emit('token', {type: 'EndTag', name: token.name, - data: this._att(token.attribs)}); + data: this._att(attribs)}); break; case CommentTk: this.emit('token', {type: 'Comment', diff --git a/modules/parser/mediawiki.TokenTransformManager.js b/modules/parser/mediawiki.TokenTransformManager.js index 6f48df95ad..66bafad9bf 100644 --- a/modules/parser/mediawiki.TokenTransformManager.js +++ b/modules/parser/mediawiki.TokenTransformManager.js @@ -16,7 +16,9 @@ * @author Gabriel Wicke */ -var events = require('events'); +var events = require('events'), + LRU = require("lru-cache"), + jshashes = require('jshashes'); /** * Base class for token transform managers @@ -127,7 +129,7 @@ TokenTransformManager.prototype.removeTransform = function ( rank, type, name ) TokenTransformManager.prototype.setTokensRank = function ( tokens, rank ) { for ( var i = 0, l = tokens.length; i < l; i++ ) { - tokens[i] = this.env.setTokenRank( tokens[i], rank ); + tokens[i] = this.env.setTokenRank( rank, tokens[i] ); } }; @@ -141,7 +143,8 @@ TokenTransformManager.prototype._cmpTransformations = function ( a, b ) { /** * Get all transforms for a given token */ -TokenTransformManager.prototype._getTransforms = function ( token ) { +TokenTransformManager.prototype._getTransforms = function ( token, minRank ) { + var ts; switch ( token.constructor ) { case String: @@ -173,7 +176,14 @@ TokenTransformManager.prototype._getTransforms = function ( token ) { ts = ts.concat( this.transformers.any ); ts.sort( this._cmpTransformations ); } - return ts; + if ( minRank !== undefined ) { + // skip transforms <= minRank + var i = 0; + for ( l = ts.length; i < l && ts[i].rank <= minRank; i++ ) { } + return ( i && ts.slice( i ) ) || ts; + } else { + return ts; + } }; /******************** Async token transforms: Phase 2 **********************/ @@ -184,15 +194,9 @@ TokenTransformManager.prototype._getTransforms = function ( token ) { * return protocol for individual transforms: * { tokens: [tokens], async: true }: async expansion -> outstanding++ in parent * { tokens: [tokens] }: fully expanded, tokens will be reprocessed - * { token: token }: single-token return * * @class * @constructor - * @param {Function} childFactory: A function that can be used to create a - * new, nested transform manager: - * nestedAsyncTokenTransformManager = manager.newChildPipeline( inputType, args ); - * @param {Object} args, the argument map for templates - * @param {Object} env, the environment. */ function AsyncTokenTransformManager ( env, isInclude, pipeFactory, phaseEndRank, attributeType ) { this.env = env; @@ -200,7 +204,7 @@ function AsyncTokenTransformManager ( env, isInclude, pipeFactory, phaseEndRank, this.pipeFactory = pipeFactory; this.phaseEndRank = phaseEndRank; this.attributeType = attributeType; - this.setFrame( null, null, {} ); + this.setFrame( null, null, [] ); this._construct(); } @@ -214,14 +218,13 @@ AsyncTokenTransformManager.prototype.constructor = AsyncTokenTransformManager; * TokenTransformManager, but keep registrations untouched. * * @method - * @param {Object} args, template arguments - * @param {Object} The environment. */ AsyncTokenTransformManager.prototype.setFrame = function ( parentFrame, title, args ) { + this.env.dp( 'AsyncTokenTransformManager.setFrame', title, args ); // First piggy-back some reset action - this.tailAccumulator = undefined; + this.tailAccumulator = null; // initial top-level callback, emits chunks - this.tokenCB = this._returnTokens.bind( this ); + this.tokenCB = this.emitChunk.bind( this ); // now actually set up the frame if (parentFrame) { @@ -236,10 +239,23 @@ AsyncTokenTransformManager.prototype.setFrame = function ( parentFrame, title, a } }; +/** + * Callback for async returns from head of TokenAccumulator chain + */ +AsyncTokenTransformManager.prototype.emitChunk = function( ret ) { + this.env.dp( 'emitChunk', ret ); + this.emit( 'chunk', ret.tokens ); + if ( ! ret.async ) { + this.emit('end'); + } else { + // allow accumulators to go direct + return this.emitChunk.bind( this ); + } +}; /** - * Simplified wrapper that processes all tokens passed in + * Simple wrapper that processes all tokens passed in */ AsyncTokenTransformManager.prototype.process = function ( tokens ) { if ( ! $.isArray ( tokens ) ) { @@ -259,194 +275,24 @@ AsyncTokenTransformManager.prototype.process = function ( tokens ) { AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) { // Set top-level callback to next transform phase var res = this.transformTokens ( tokens, this.tokenCB ); - this.env.dp( 'AsyncTokenTransformManager onChunk res=', res ); + this.env.dp( 'AsyncTokenTransformManager onChunk', res.async? 'async' : 'sync', res.tokens ); + // Emit or append the returned tokens if ( ! this.tailAccumulator ) { + this.env.dp( 'emitting' ); this.emit( 'chunk', res.tokens ); } else { + this.env.dp( 'appending to tail' ); this.tailAccumulator.append( res.tokens ); } - + + // Update the tail of the current accumulator chain if ( res.async ) { this.tailAccumulator = res.async; this.tokenCB = res.async.getParentCB ( 'sibling' ); } }; -/** - * Run transformations from phases 0 and 1. This includes starting and - * managing asynchronous transformations. - * - */ -AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parentCB ) { - - //console.warn('AsyncTokenTransformManager.transformTokens: ' + JSON.stringify(tokens) ); - - var res, - localAccum = [], - activeAccum = localAccum, - transforming = true, - childAccum = null, - accum = new TokenAccumulator( this, parentCB ), - token, ts, transformer, aborted, - maybeSyncReturn = function ( asyncCB, ret ) { - if ( transforming ) { - this.env.dp( 'maybeSyncReturn transforming', ret ); - // transformTokens is ongoing - if ( false && ret.tokens && ! ret.async && ret.allTokensProcessed && ! childAccum ) { - localAccum.push.apply(localAccum, res.tokens ); - } else if ( ret.tokens ) { - if ( res.tokens ) { - res.tokens = res.tokens.concat( ret.tokens ); - res.async = ret.async; - } else { - res = ret; - } - } else { - if ( ! res.tokens ) { - res = ret; - } else { - res.async = ret.async; - } - } - } else { - this.env.dp( 'maybeSyncReturn async', ret ); - asyncCB( ret ); - } - }, - cb = maybeSyncReturn.bind( this, accum.getParentCB( 'child' ) ), - minRank; - - for ( var i = 0, l = tokens.length; i < l; i++ ) { - token = tokens[i]; - minRank = token.rank || 0; - aborted = false; - ts = this._getTransforms( token ); - - if ( ts.length ) { - - //this.env.tp( 'async trans' ); - this.env.dp( token, ts ); - for (var j = 0, lts = ts.length; j < lts; j++ ) { - res = { }; - transformer = ts[j]; - if ( minRank && transformer.rank <= minRank ) { - // skip transformation, was already applied. - //console.warn( 'skipping transform'); - res.token = token; - continue; - } - // Transform the token. - transformer.transform( token, this.frame, cb ); - - // Check the result, which is changed using the - // maybeSyncReturn callback - if ( res.token === undefined ) { - if ( res.tokens && res.tokens.length === 1 && - token === res.tokens[0] ) - { - res.token = token; - } else { - aborted = true; - break; - } - } - minRank = transformer.rank; - token = res.token; - } - } else { - res = { token: token }; - } - - if ( ! aborted ) { - res.token = this.env.setTokenRank( res.token, this.phaseEndRank ); - // token is done. - // push to accumulator - activeAccum.push( res.token ); - continue; - } else if( res.tokens ) { - // Splice in the returned tokens (while replacing the original - // token), and process them next. - //if ( ! res.allTokensProcessed ) { - [].splice.apply( tokens, [i, 1].concat(res.tokens) ); - l = tokens.length; - i--; // continue at first inserted token - //} else { - // skip fully processed tokens - //} - } - - if ( res.async ) { - this.env.dp( 'res.async' ); - // The child now switched to activeAccum, we have to create a new - // accumulator for the next potential child. - activeAccum = accum; - childAccum = accum; - accum = new TokenAccumulator( this, childAccum.getParentCB( 'sibling' ) ); - cb = maybeSyncReturn.bind( this, accum.getParentCB( 'child' ) ); - } - } - transforming = false; - - // Return finished tokens directly to caller, and indicate if further - // async actions are outstanding. The caller needs to point a sibling to - // the returned accumulator, or call .siblingDone() to mark the end of a - // chain. - return { tokens: localAccum, async: childAccum }; -}; - -/** - * Top-level callback for tokens which are now free to be emitted iff they are - * indeed fully processed for sync01 and async12. If there were asynchronous - * expansions, then only the first TokenAccumulator has its callback set to - * this method. An exhausted TokenAccumulator passes its callback on to its - * siblings until the last accumulator is reached, so that the head - * accumulator will always call this method directly. - * - * @method - * @param {Object} tokens, async, allTokensProcessed - * @returns {Mixed} new parent callback for caller or falsy value. - */ -AsyncTokenTransformManager.prototype._returnTokens = function ( ret ) { - //tokens = this._transformPhase2( this.frame, tokens, this.parentCB ); - - this.env.dp( 'AsyncTokenTransformManager._returnTokens, emitting chunk: ', - ret ); - - if( !ret.allTokensProcessed ) { - var res = this.transformTokens( ret.tokens, this._returnTokens.bind(this) ); - this.emit( 'chunk', res.tokens ); - if ( res.async ) { - // XXX: this looks fishy - if ( ! this.tailAccumulator ) { - this.tailAccumulator = res.async; - this.tokenCB = res.async.getParentCB ( 'sibling' ); - } - if ( ret.async ) { - // return sibling callback - return this.tokenCB; - } else { - // signal done-ness to last accum - res.async.siblingDone(); - } - } else if ( !ret.async ) { - this.emit( 'end' ); - // and reset internal state. - //this._reset(); - } - } else { - this.emit( 'chunk', ret.tokens ); - - if ( ! ret.async ) { - //console.trace(); - // signal our done-ness to consumers. - this.emit( 'end' ); - // and reset internal state. - //this._reset(); - } - } -}; - /** * Callback for the end event emitted from the tokenizer. * Either signals the end of input to the tail of an ongoing asynchronous @@ -456,18 +302,315 @@ AsyncTokenTransformManager.prototype._returnTokens = function ( ret ) { AsyncTokenTransformManager.prototype.onEndEvent = function () { if ( this.tailAccumulator ) { this.env.dp( 'AsyncTokenTransformManager.onEndEvent: calling siblingDone', - this.frame ); + this.frame.title ); this.tailAccumulator.siblingDone(); } else { // nothing was asynchronous, so we'll have to emit end here. this.env.dp( 'AsyncTokenTransformManager.onEndEvent: synchronous done', - this.frame ); + this.frame.title ); this.emit('end'); //this._reset(); } }; +/** + * Utility method to set up a new TokenAccumulator with the right callbacks. + */ +AsyncTokenTransformManager.prototype._makeNextAccum = function( cb, state ) { + var res = {}; + res.accum = new TokenAccumulator( this, cb ); + var _cbs = { parent: res.accum.getParentCB( 'child' ) }; + res.cb = this.maybeSyncReturn.bind( this, state, _cbs ); + _cbs.self = res.cb; + return res; +}; + +// Debug counter, provides an UID for transformTokens calls so that callbacks +// associated with it can be identified in debugging output as c-XXX. +AsyncTokenTransformManager.prototype._counter = 0; + +/** + * Run asynchronous transformations. This is the big workhorse where + * templates, images, links and other async expansions (see the transform + * recipe mediawiki.parser.js) are processed. + * + * @param tokens {Array}: Chunk of tokens, potentially with rank and other + * meta information associated with it. + * @param parentCB {Function}: callback for asynchronous results + * @returns {Object}: { tokens: [], async: falsy or the tail TokenAccumulator } + * The returned chunk is fully expanded for this phase, and the rank set + * to reflect this. + */ +AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parentCB ) { + + //console.warn('AsyncTokenTransformManager.transformTokens: ' + JSON.stringify(tokens) ); + + var inputRank = tokens.rank || 0, + localAccum = [], // a local accum for synchronously returned fully processed tokens + activeAccum = localAccum, // start out collecting tokens in localAccum + // until the first async transform is hit + workStack = [], // stack of stacks (reversed chunks) of tokens returned + // from transforms to process before consuming the next + // input token + token, // currently processed token + s = { // Shared state accessible to synchronous transforms in this.maybeSyncReturn + transforming: true, + res: {}, + // debug id for this expansion + c: 'c-' + AsyncTokenTransformManager.prototype._counter++ + }, + minRank; + + // make localAccum compatible with getParentCB('sibling') + localAccum.getParentCB = function() { return parentCB }; + var nextAccum = this._makeNextAccum( parentCB, s ); + + var i = 0, + l = tokens.length; + while ( i < l || workStack.length ) { + if ( workStack.length ) { + var curChunk = workStack[workStack.length - 1]; + minRank = curChunk.rank || inputRank; + token = curChunk.pop(); + if ( !curChunk.length ) { + + // activate nextActiveAccum after consuming the chunk + if ( curChunk.nextActiveAccum ) { + if ( activeAccum !== curChunk.oldActiveAccum ) { + // update the callback of the next active accum + curChunk.nextActiveAccum.setParentCB( activeAccum.getParentCB('sibling') ); + } + activeAccum = curChunk.nextActiveAccum; + // create new accum and cb for transforms + nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s ); + } + + // remove empty chunk from workstack + workStack.pop(); + } + } else { + token = tokens[i]; + i++; + minRank = inputRank; + if ( token.constructor === Array ) { + if ( ! token.length ) { + // skip it + } else if ( ! token.rank || token.rank < this.phaseEndRank ) { + workStack.push( token ); + } else { + // don't process the array in this phase. + activeAccum.push( token ); + } + continue; + } else if ( token.constructor === ParserValue ) { + // Parser functions etc that run before full attribute + // expansion are responsible for the full expansion of + // returned attributes in their respective environments. + throw( 'Unexpected ParserValue in AsyncTokenTransformManager.transformTokens:' + + JSON.stringify( token ) ); + } + } + + var ts = this._getTransforms( token, minRank ); + + //this.env.dp( 'async token:', s.c, token, minRank, ts ); + + if ( ! ts.length ) { + // nothing to do for this token + activeAccum.push( token ); + } else { + //this.env.tp( 'async trans' ); + for (var j = 0, lts = ts.length; j < lts; j++ ) { + var transformer = ts[j]; + s.res = { }; + // Transform the token. + transformer.transform( token, this.frame, nextAccum.cb ); + + var resTokens = s.res.tokens; + + //this.env.dp( 's.res:', s.c, s.res ); + + // Check the result, which is changed using the + // maybeSyncReturn callback + if ( resTokens && resTokens.length ) { + if ( resTokens.length === 1 ) { + if ( token === resTokens[0] && ! resTokens.rank ) { + // token not modified, continue with + // transforms. + token = resTokens[0]; + continue; + } else if ( + resTokens.rank === this.phaseEndRank || + ( resTokens[0].constructor === String && + ! this.transformers.text.length ) ) + { + // Fast path for text token, and nothing to do for it + // Abort processing, but treat token as done. + token = resTokens[0]; + break; + } + } + + // token(s) were potentially modified + if ( ! resTokens.rank || resTokens.rank < this.phaseEndRank ) { + // There might still be something to do for these + // tokens. Prepare them for the workStack. + var revTokens = resTokens.slice(); + revTokens.reverse(); + // Don't apply earlier transforms to results of a + // transformer to avoid loops and keep the + // execution model sane. + revTokens.rank = resTokens.rank || transformer.rank; + //revTokens.rank = Math.max( resTokens.rank || 0, transformer.rank ); + revTokens.oldActiveAccum = activeAccum; + workStack.push( revTokens ); + if ( s.res.async ) { + revTokens.nextActiveAccum = nextAccum.accum; + } + // create new accum and cb for transforms + //activeAccum = nextAccum.accum; + nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s ); + // don't trigger activeAccum switch / _makeNextAccum call below + s.res.async = false; + this.env.dp( 'workStack', s.c, revTokens.rank, workStack ); + } + } + // Abort processing for this token + token = null; + break; + } + + if ( token !== null ) { + // token is done. + // push to accumulator + //console.warn( 'pushing ' + token ); + activeAccum.push( token ); + } + + if ( s.res.async ) { + this.env.dp( 'res.async, creating new TokenAccumulator', s.c ); + // The child now switched to activeAccum, we have to create a new + // accumulator for the next potential child. + activeAccum = nextAccum.accum; + nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s ); + } + } + } + // we are no longer transforming, maybeSyncReturn needs to follow the + // async code path + s.transforming = false; + + // All tokens in localAccum are fully processed + localAccum.rank = this.phaseEndRank; + + this.env.dp( 'localAccum', + activeAccum !== localAccum ? 'async' : 'sync', + s.c, + localAccum ); + + // Return finished tokens directly to caller, and indicate if further + // async actions are outstanding. The caller needs to point a sibling to + // the returned accumulator, or call .siblingDone() to mark the end of a + // chain. + var retAccum = activeAccum !== localAccum ? activeAccum : null; + return { tokens: localAccum, async: retAccum }; +}; + +/** + * Callback for async transforms + * + * Converts direct callbacks into a synchronous return by collecting the + * results in s.res. Re-start transformTokens for any async returns, and calls + * the provided asyncCB (TokenAccumulator._returnTokens normally). + */ +AsyncTokenTransformManager.prototype.maybeSyncReturn = function ( s, cbs, ret ) { + if ( s.transforming ) { + if ( ! ret ) { + // support empty callbacks, for simple async signalling + s.res.async = true; + return; + } + + // transformTokens is still ongoing, handle as sync return by + // collecting the results in s.res + this.env.dp( 'maybeSyncReturn transforming', s.c, ret ); + if ( ret.tokens ) { + if ( ret.tokens.constructor !== Array ) { + ret.tokens = [ ret.tokens ]; + } + if ( s.res.tokens ) { + var oldRank = s.res.tokens.rank; + s.res.tokens = s.res.tokens.concat( ret.tokens ); + if ( oldRank && ret.tokens.rank ) { + // Conservatively set the overall rank to the minimum. + // This assumes that multi-pass expansion for some tokens + // is safe. We might want to revisit that later. + Math.min( oldRank, ret.tokens.rank ); + } + s.res.async = ret.async; + } else { + s.res = ret; + } + } else if ( ret.constructor === Array ) { + console.trace(); + } + s.res.async = ret.async; + //console.trace(); + } else if ( ret !== undefined ) { + // Since the original transformTokens call is already done, we have to + // re-start application of any remaining transforms here. + this.env.dp( 'maybeSyncReturn async', s.c, ret ); + var asyncCB = cbs.parent, + tokens = ret.tokens; + if ( tokens && tokens.length && + ( ! tokens.rank || tokens.rank < this.phaseEndRank ) && + ! ( tokens.length === 1 && tokens[0].constructor === String ) ) { + // Re-process incomplete tokens + this.env.dp( 'maybeSyncReturn: recursive transformTokens', + this.frame.title, ret.tokens ); + + // Set up a new child callback with its own callback state + var _cbs = { async: cbs.parent }, + childCB = this.maybeSyncReturn.bind( this, s, _cbs ); + _cbs.self = childCB; + + var res = this.transformTokens( ret.tokens, childCB ); + ret.tokens = res.tokens; + if ( res.async ) { + // Insert new child accumulator chain- any further chunks from + // the transform will be passed as sibling to the last accum + // in this chain, and the new chain will pass its results to + // the former parent accumulator. + + if ( ! ret.async ) { + // There will be no more input to the child pipeline + res.async.siblingDone(); + + // We need to indicate that more results will follow from + // the child pipeline. + ret.async = true; + } else { + // More tokens will follow from original expand. + // Need to return results of recursive expand *before* further + // async results, so we simply pass further results to the + // last accumulator in the new chain. + cbs.parent = res.async.getParentCB( 'sibling' ); + } + } + + } + + asyncCB( ret ); + + if ( ret.async ) { + // Pass reference to maybeSyncReturn to TokenAccumulators to allow + // them to call directly + return cbs.self; + } + } +}; + @@ -517,54 +660,63 @@ SyncTokenTransformManager.prototype.onChunk = function ( tokens ) { var res, localAccum = [], localAccumLength = 0, + workStack = [], // stack of stacks of tokens returned from transforms + // to process before consuming the next input token token, // Top-level frame only in phase 3, as everything is already expanded. ts, transformer, - aborted; + aborted, minRank; - for ( var i = 0, l = tokens.length; i < l; i++ ) { + for ( var i = 0, l = tokens.length; i < l || workStack.length; i++ ) { aborted = false; - token = tokens[i]; + if ( workStack.length ) { + i--; + var curChunk = workStack[workStack.length - 1]; + minRank = curChunk.rank || tokens.rank || this.phaseEndRank - 1; + token = curChunk.pop(); + if ( !curChunk.length ) { + // remove empty chunk + workStack.pop(); + } + } else { + token = tokens[i]; + minRank = tokens.rank || this.phaseEndRank - 1; + } res = { token: token }; - ts = this._getTransforms( token ); - var minRank = token.rank || 0; + ts = this._getTransforms( token, minRank ); + //this.env.dp( 'sync tok:', minRank, token.rank, token, ts ); for (var j = 0, lts = ts.length; j < lts; j++ ) { transformer = ts[j]; - if ( transformer.rank <= minRank ) { - // skip transformation, was already applied. - //console.warn( 'skipping transform'); - continue; - } // Transform the token. res = transformer.transform( token, this, this.prevToken ); + //this.env.dp( 'sync res0:', res ); if ( res.token !== token ) { aborted = true; + if ( res.token ) { + res.tokens = [res.token]; + delete res.token; + } break; } - minRank = transformer.rank; token = res.token; } + //this.env.dp( 'sync res:', res ); - if( res.tokens ) { + if( res.tokens && res.tokens.length ) { // Splice in the returned tokens (while replacing the original // token), and process them next. - [].splice.apply( tokens, [i, 1].concat(res.tokens) ); - l = tokens.length; - i--; // continue at first inserted token + var revTokens = res.tokens.slice(); + revTokens.reverse(); + revTokens.rank = transformer.rank; + workStack.push( revTokens ); } else if ( res.token ) { - res.token = this.env.setTokenRank( res.token, this.phaseEndRank ); - if ( res.token.rank === this.phaseEndRank ) { - // token is done. - localAccum.push(res.token); - this.prevToken = res.token; - } else { - // re-process token. - tokens[i] = res.token; - i--; - } + localAccum.push(res.token); + this.prevToken = res.token; } } + localAccum.rank = this.phaseEndRank; + localAccum.cache = tokens.cache; this.env.dp( 'SyncTokenTransformManager.onChunk: emitting ', localAccum ); this.emit( 'chunk', localAccum ); }; @@ -607,17 +759,17 @@ function AttributeTransformManager ( manager, callback ) { //this.pipe = manager.getAttributePipeline( manager.args ); } +// A few constants +AttributeTransformManager.prototype._toType = 'tokens/x-mediawiki/expanded'; + /** * Expand both key and values of all key/value pairs. Used for generic * (non-template) tokens in the AttributeExpander handler, which runs after * templates are already expanded. */ AttributeTransformManager.prototype.process = function ( attributes ) { - // Potentially need to use multiple pipelines to support concurrent async expansion - //this.pipe.process( var pipe, - ref, - idCB = function( format, cb ){ cb( this ); }; + ref; //console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) ); // transform each argument (key and value), and handle asynchronous returns @@ -640,16 +792,11 @@ AttributeTransformManager.prototype.process = function ( attributes ) { this.outstanding++; // transform the key - pipe = this.manager.pipeFactory.getPipeline( this.manager.attributeType, - this.manager.isInclude ); - pipe.setFrame( this.manager.frame, null ); - pipe.on( 'chunk', - this._returnAttributeKey.bind( this, i, true ) - ); - pipe.on( 'end', - this._returnAttributeKey.bind( this, i, false, [] ) - ); - pipe.process( this.manager.env.cloneTokens( cur.k ).concat([ new EOFTk() ]) ); + this.frame.expand( cur.k, + { + type: this._toType, + cb: this._returnAttributeKey.bind( this, i ) + } ); } else { kv.k = cur.k; } @@ -659,19 +806,11 @@ AttributeTransformManager.prototype.process = function ( attributes ) { this.outstanding++; // transform the value - pipe = this.manager.pipeFactory.getPipeline( this.manager.attributeType, - this.manager.isInclude ); - pipe.setFrame( this.manager.frame, null ); - //pipe = this.manager.getAttributePipeline( this.manager.inputType, - // this.manager.args ); - pipe.on( 'chunk', - this._returnAttributeValue.bind( this, i, true ) - ); - pipe.on( 'end', - this._returnAttributeValue.bind( this, i, false, [] ) - ); - //console.warn('starting attribute transform of ' + JSON.stringify( attributes[i].v ) ); - pipe.process( this.manager.env.cloneTokens( cur.v ).concat([ new EOFTk() ]) ); + this.frame.expand( cur.v, + { + type: this._toType, + cb: this._returnAttributeValue.bind( this, i ) + } ); } else { kv.v = cur.v; } @@ -683,57 +822,47 @@ AttributeTransformManager.prototype.process = function ( attributes ) { } }; + /** * Expand only keys of key/value pairs. This is generally used for template * parameters to avoid expanding unused values, which is very important for * constructs like switches. */ AttributeTransformManager.prototype.processKeys = function ( attributes ) { - // Potentially need to use multiple pipelines to support concurrent async expansion - //this.pipe.process( var pipe, ref; //console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) ); + + // TODO: wrap in chunk and call + // .get( { type: 'text/x-mediawiki/expanded' } ) on it - // transform each argument (key and value), and handle asynchronous returns + // transform the key for each attribute pair + var kv; for ( var i = 0, l = attributes.length; i < l; i++ ) { var cur = attributes[i]; - var kv = new KV([], cur.v); - if ( kv.v.to !== this.manager.frame.convert ) { - if ( kv.v.to ) { - if ( kv.v.constructor === String ) { - kv.v = new String( kv.v ); - } else { - kv.v = kv.v.slice(); - } - } else if ( kv.v.constructor === String ) { - kv.v = new String( kv.v ); - } - Object.defineProperty( kv.v, 'to', - { - value: this.manager.frame.convert, - enumerable: false - }); + + // fast path for string-only attributes + if ( cur.k.constructor === String && cur.v.constructor === String ) { + kv = new KV( cur.k, this.frame.newParserValue( cur.v ) ); + this.kvs.push( kv ); + continue; } + + // Wrap the value in a ParserValue for lazy expansion + kv = new KV( [], this.frame.newParserValue( cur.v ) ); this.kvs.push( kv ); - if ( cur.k.constructor === Array && cur.k.length && ! cur.k.to ) { + // And expand the key, if needed + if ( cur.k.constructor === Array && cur.k.length && ! cur.k.get ) { // Assume that the return is async, will be decremented in callback this.outstanding++; // transform the key - pipe = this.manager.pipeFactory.getPipeline( this.manager.attributeType, - this.manager.isInclude ); - pipe.setFrame( this.manager.frame, null ); - //pipe = this.manager.getAttributePipeline( this.manager.inputType, - // this.manager.args ); - pipe.on( 'chunk', - this._returnAttributeKey.bind( this, i, true ) - ); - pipe.on( 'end', - this._returnAttributeKey.bind( this, i, false, [] ) - ); - pipe.process( this.manager.env.cloneTokens( cur.k ).concat([ new EOFTk() ]) ); + this.frame.expand( cur.k, + { + type: this._toType, + cb: this._returnAttributeKey.bind( this, i ) + } ); } else { kv.k = cur.k; } @@ -755,7 +884,7 @@ AttributeTransformManager.prototype.processValues = function ( attributes ) { ref; //console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) ); - // transform each argument (key and value), and handle asynchronous returns + // transform each value for ( var i = 0, l = attributes.length; i < l; i++ ) { var cur = attributes[i]; var kv = new KV( cur.k, [] ); @@ -772,19 +901,11 @@ AttributeTransformManager.prototype.processValues = function ( attributes ) { this.outstanding++; // transform the value - pipe = this.manager.pipeFactory.getPipeline( this.manager.attributeType, - this.manager.isInclude ); - pipe.setFrame( this.manager.frame, null ); - //pipe = this.manager.getAttributePipeline( this.manager.inputType, - // this.manager.args ); - pipe.on( 'chunk', - this._returnAttributeValue.bind( this, i, true ) - ); - pipe.on( 'end', - this._returnAttributeValue.bind( this, i, false, [] ) - ); - //console.warn('starting attribute transform of ' + JSON.stringify( attributes[i].v ) ); - pipe.process( this.manager.env.cloneTokens( cur.v ).concat([ new EOFTk() ]) ); + this.frame.expand( cur.v, + { + type: this._toType, + cb: this._returnAttributeValue.bind( this, i ) + } ); } else { kv.value = cur.v; } @@ -799,52 +920,26 @@ AttributeTransformManager.prototype.processValues = function ( attributes ) { /** * Callback for async argument value expansions */ -AttributeTransformManager.prototype._returnAttributeValue = function ( ref, notYetDone, tokens ) { - this.manager.env.dp( 'check _returnAttributeValue: ', ref, tokens, - ' notYetDone:', notYetDone ); - this.kvs[ref].v = this.kvs[ref].v.concat( tokens ); - if ( ! notYetDone ) { - var res = this.kvs[ref].v; - this.manager.env.stripEOFTkfromTokens( res ); - this.outstanding--; - // Add the 'to' conversion method to the chunk for easy conversion in - // later processing (parser functions and template argument - // processing). - if ( res.to !== this.manager.frame.convert ) { - if ( res.to ) { - if ( res.constructor === String ) { - res = new String( res ); - } else { - res = res.slice(); - } - } else if ( res.constructor === String ) { - res = new String( res ); - } - Object.defineProperty( res, 'to', - { - value: function( format, cb ) { cb( this ); }, - enumerable: false - }); - } - if ( this.outstanding === 0 ) { - this.callback( this.kvs ); - } +AttributeTransformManager.prototype._returnAttributeValue = function ( ref, tokens ) { + this.manager.env.dp( 'check _returnAttributeValue: ', ref, tokens ); + this.kvs[ref].v = tokens; + this.kvs[ref].v = this.manager.env.stripEOFTkfromTokens( this.kvs[ref].v ); + this.outstanding--; + if ( this.outstanding === 0 ) { + this.callback( this.kvs ); } }; /** * Callback for async argument key expansions */ -AttributeTransformManager.prototype._returnAttributeKey = function ( ref, notYetDone, tokens ) { - //console.warn( 'check _returnAttributeKey: ' + JSON.stringify( tokens ) + - // ' notYetDone:' + notYetDone ); - this.kvs[ref].k = this.kvs[ref].k.concat( tokens ); - if ( ! notYetDone ) { - this.manager.env.stripEOFTkfromTokens( this.kvs[ref].k ); - this.outstanding--; - if ( this.outstanding === 0 ) { - this.callback( this.kvs ); - } +AttributeTransformManager.prototype._returnAttributeKey = function ( ref, tokens ) { + //console.warn( 'check _returnAttributeKey: ' + JSON.stringify( tokens ) ); + this.kvs[ref].k = tokens; + this.kvs[ref].k = this.manager.env.stripEOFTkfromTokens( this.kvs[ref].k ); + this.outstanding--; + if ( this.outstanding === 0 ) { + this.callback( this.kvs ); } }; @@ -882,32 +977,35 @@ TokenAccumulator.prototype.getParentCB = function ( reference ) { return this._returnTokens.bind( this, reference ); }; +TokenAccumulator.prototype.setParentCB = function ( cb ) { + this.parentCB = cb; +}; + /** * Pass tokens to an accumulator * * @method * @param {String}: reference, 'child' or 'sibling'. - * @param {Object}: { tokens, async, allTokensProcessed } + * @param {Object}: { tokens, async } * @returns {Mixed}: new parent callback for caller or falsy value */ -TokenAccumulator.prototype._returnTokens = - function ( reference, ret ) { +TokenAccumulator.prototype._returnTokens = function ( reference, ret ) { var cb, returnTokens = []; - if ( ! ret.async ) { this.outstanding--; } - this.manager.env.dp( 'TokenAccumulator._returnTokens', ret ); + this.manager.env.dp( 'TokenAccumulator._returnTokens', reference, ret ); // FIXME if ( ret.tokens === undefined ) { if ( this.manager.env.debug ) { + console.warn( 'ret.tokens undefined: ' + JSON.stringify( ret ) ); console.trace(); } - if ( ret.token ) { + if ( ret.token !== undefined ) { ret.tokens = [ret.token]; } else { ret.tokens = []; @@ -916,12 +1014,6 @@ TokenAccumulator.prototype._returnTokens = if ( reference === 'child' ) { var res = {}; - if( !ret.allTokensProcessed ) { - // There might be transformations missing on the returned tokens, - // re-transform to make sure those are applied too. - res = this.manager.transformTokens( ret.tokens, this.parentCB ); - ret.tokens = res.tokens; - } if ( !ret.async ) { // empty accum too @@ -930,43 +1022,39 @@ TokenAccumulator.prototype._returnTokens = } //this.manager.env.dp( 'TokenAccumulator._returnTokens child: ', // tokens, ' outstanding: ', this.outstanding ); - ret.allTokensProcessed = true; + ret.tokens.rank = this.manager.phaseEndRank; ret.async = this.outstanding; - this.parentCB( ret ); + this._callParentCB( ret ); - if ( res.async ) { - this.parentCB = res.async.getParentCB( 'sibling' ); - } return null; } else { // sibling if ( this.outstanding === 0 ) { ret.tokens = this.accum.concat( ret.tokens ); - // A sibling will transform tokens, so we don't have to do this - // again. + // A sibling has already transformed child tokens, so we don't + // have to do this again. //this.manager.env.dp( 'TokenAccumulator._returnTokens: ', // 'sibling done and parentCB ', // tokens ); - ret.allTokensProcessed = true; + ret.tokens.rank = this.manager.phaseEndRank; ret.async = false; this.parentCB( ret ); return null; } else if ( this.outstanding === 1 && ret.async ) { - //this.manager.env.dp( 'TokenAccumulator._returnTokens: ', - // 'sibling done and parentCB but async ', - // tokens ); // Sibling is not yet done, but child is. Return own parentCB to // allow the sibling to go direct, and call back parent with // tokens. The internal accumulator is empty at this stage, as its // tokens are passed to the parent when the child is done. - ret.allTokensProcessed = true; - return this.parentCB( ret ); + ret.tokens.rank = this.manager.phaseEndRank; + return this._callParentCB( ret ); } else { + // sibling tokens are always fully processed for this phase, so we + // can directly concatenate them here. this.accum = this.accum.concat( ret.tokens ); - //this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done, but not overall. async=', - // res.async, ', this.outstanding=', this.outstanding, - // ', this.accum=', this.accum, ' manager.title=', this.manager.title ); + this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done, but not overall. async=', + ret.async, ', this.outstanding=', this.outstanding, + ', this.accum=', this.accum, ' frame.title=', this.manager.frame.title ); } @@ -978,10 +1066,18 @@ TokenAccumulator.prototype._returnTokens = */ TokenAccumulator.prototype.siblingDone = function () { //console.warn( 'TokenAccumulator.siblingDone: ' ); - this._returnTokens ( 'sibling', { tokens: [], async: false, allTokensProcessed: true} ); + this._returnTokens ( 'sibling', { tokens: [], async: false } ); }; +TokenAccumulator.prototype._callParentCB = function ( ret ) { + var cb = this.parentCB( ret ); + if ( cb ) { + this.parentCB = cb; + } + return this.parentCB; +}; + /** * Push a token into the accumulator * @@ -1009,7 +1105,7 @@ TokenAccumulator.prototype.append = function ( token ) { * The Frame object * * A frame represents a template expansion scope including parameters passed - * to the template (args). It provides a generic 'convert' method which + * to the template (args). It provides a generic 'expand' method which * expands / converts individual parameter values in its scope. It also * provides methods to check if another expansion would lead to loops or * exceed the maximum expansion depth. @@ -1019,17 +1115,23 @@ function Frame ( title, manager, args, parentFrame ) { this.title = title; this.manager = manager; this.args = new Params( this.manager.env, args ); + // cache key fragment for expansion cache + // FIXME: better use fully expand args for the cache key! Can avoid using + // the parent cache keys in that case. + var MD5 = new jshashes.MD5(); + if ( args._cacheKey === undefined ) { + args._cacheKey = MD5.hex( JSON.stringify( args ) ); + } + if ( parentFrame ) { this.parentFrame = parentFrame; this.depth = parentFrame.depth + 1; + this._cacheKey = MD5.hex( parentFrame._cacheKey + args._cacheKey ); } else { this.parentFrame = null; this.depth = 0; + this._cacheKey = args._cacheKey; } - var self = this; - this.convert = function ( format, cb, parentCB ) { - self._convertThunk( this, format, cb, parentCB ); - }; } /** @@ -1041,83 +1143,116 @@ Frame.prototype.newChild = function ( title, manager, args ) { /** * Expand / convert a thunk (a chunk of tokens not yet fully expanded). + * + * XXX: support different input formats, expansion phases / flags and more + * output formats. */ -Frame.prototype._convertThunk = function ( chunk, format, cb, parentCB ) { - this.manager.env.dp( 'convertChunk', chunk ); +Frame.prototype.expand = function ( chunk, options ) { + var outType = options.type || 'text/x-mediawiki/expanded'; + var cb = options.cb || console.warn( JSON.stringify( options ) ); + this.manager.env.dp( 'Frame.expand', this._cacheKey, chunk ); if ( chunk.constructor === String ) { // Plain text remains text. Nothing to do. - return cb( chunk ); - } - - // Set up a conversion cache on the chunk, if it does not yet exist - if ( chunk.toCache === undefined ) { - Object.defineProperty( chunk, 'toCache', { value: {}, enumerable: false } ); - } else { - if ( chunk.toCache[format] !== undefined ) { - cb( chunk.toCache[format] ); - return; + if ( outType !== 'text/x-mediawiki/expanded' ) { + return cb( [ chunk ] ); + } else { + return cb( chunk ); } + } else if ( chunk.constructor === ParserValue ) { + // Delegate to ParserValue + return chunk.get( options ); } - if ( format === 'text/plain/expanded' ) { + + // We are now dealing with an Array of tokens. See if the chunk is + // a source chunk with a cache attached. + var maybeCached; + if ( ! chunk.length ) { + // nothing to do, simulate a cache hit.. + maybeCached = chunk; + } else if ( chunk.cache === undefined ) { + // add a cache to the chunk + Object.defineProperty( chunk, 'cache', + // XXX: play with cache size! + { value: new ExpansionCache( 5 ), enumerable: false } ); + } else { + // try to retrieve cached expansion + maybeCached = chunk.cache.get( this, options ); + // XXX: disable caching of error messages! + } + if ( maybeCached ) { + this.manager.env.dp( 'got cache', this.title, this._cacheKey, maybeCached ); + return cb( maybeCached ); + } + + // not cached, actually have to do some work. + if ( outType === 'text/x-mediawiki/expanded' ) { // Simply wrap normal expansion ;) // XXX: Integrate this into the pipeline setup? - format = 'tokens/x-mediawiki/expanded'; + outType = 'tokens/x-mediawiki/expanded'; var self = this, origCB = cb; cb = function( resChunk ) { var res = self.manager.env.tokensToString( resChunk ); // cache the result - chunk.toCache['text/plain/expanded'] = res; + chunk.cache.set( self, options, res ); origCB( res ); }; } // XXX: Should perhaps create a generic from..to conversion map in // mediawiki.parser.js, at least for forward conversions. - if ( format === 'tokens/x-mediawiki/expanded' ) { - if ( parentCB ) { + if ( outType === 'tokens/x-mediawiki/expanded' ) { + if ( options.asyncCB ) { // Signal (potentially) asynchronous expansion to parent. - // If - parentCB( { async: true } ); + options.asyncCB( ); } var pipeline = this.manager.pipeFactory.getPipeline( + // XXX: use input type this.manager.attributeType || 'tokens/x-mediawiki', true ); pipeline.setFrame( this, null ); - // In the interest of interface simplicity, we accumulate all emitted + // In the name of interface simplicity, we accumulate all emitted // chunks in a single accumulator. - var accum = []; - // Callback used to cache the result of the conversion - var cacheIt = function ( res ) { chunk.toCache[format] = res; }; + var eventState = { cache: chunk.cache, options: options, accum: [], cb: cb }; pipeline.addListener( 'chunk', - this.onThunkEvent.bind( this, cacheIt, accum, true, cb ) ); + this.onThunkEvent.bind( this, eventState, true ) ); pipeline.addListener( 'end', - this.onThunkEvent.bind( this, cacheIt, accum, false, cb ) ); - pipeline.process( chunk.concat( [new EOFTk()] ), this.title ); + this.onThunkEvent.bind( this, eventState, false ) ); + if ( chunk[chunk.length - 1].constructor === EOFTk ) { + pipeline.process( chunk, this.title ); + } else { + var newChunk = chunk.concat( this._eofTkList ); + newChunk.rank = chunk.rank; + pipeline.process( newChunk, this.title ); + } } else { - throw "Frame._convertThunk: Unsupported format " + format; + throw "Frame.expand: Unsupported output type " + outType; } }; +// constant chunk terminator +Frame.prototype._eofTkList = [ new EOFTk() ]; + /** * Event handler for chunk conversion pipelines */ -Frame.prototype.onThunkEvent = function ( cacheIt, accum, notYetDone, cb, ret ) { +Frame.prototype.onThunkEvent = function ( state, notYetDone, ret ) { if ( notYetDone ) { - //this.manager.env.dp( 'Frame.onThunkEvent accum:', accum ); - accum.push.apply( accum, ret ); + state.accum = state.accum.concat(this.manager.env.stripEOFTkfromTokens( ret ) ); + this.manager.env.dp( 'Frame.onThunkEvent accum:', this._cacheKey, state.accum ); } else { - this.manager.env.stripEOFTkfromTokens( accum ); - this.manager.env.dp( 'Frame.onThunkEvent:', accum ); - cacheIt( accum ); - cb ( accum ); + this.manager.env.dp( 'Frame.onThunkEvent:', this._cacheKey, state.accum ); + state.cache.set( this, state.options, state.accum ); + // Add cache to accum too + Object.defineProperty( state.accum, 'cache', + { value: state.cache, enumerable: false } ); + state.cb ( state.accum ); } }; - /** * Check if expanding would lead to a loop, or would exceed the * maximum expansion depth. @@ -1146,6 +1281,60 @@ Frame.prototype.loopAndDepthCheck = function ( title, maxDepth ) { return false; }; +/** + * ParserValue factory + * + * ParserValues wrap a piece of content that can be retrieved in different + * expansion stages and different content types using the get() method. + * Content types currently include 'tokens/x-mediawiki/expanded' for + * pre-processed tokens and 'text/x-mediawiki/expanded' for pre-processed + * wikitext. + */ +Frame.prototype.newParserValue = function ( source, options ) { + // TODO: support more options: + // options.type to specify source type + // options.phase to specify source expansion stage + if ( source.constructor === String ) { + source = new String( source ); + source.get = this._getID; + return source; + } else if ( options && options.frame ) { + return new ParserValue( source, options.frame ); + } else { + return new ParserValue( source, this ); + } +}; + +Frame.prototype._getID = function( options ) { + options.cb( this ); +}; + +/** + * A specialized expansion cache, normally associated with a chunk of tokens. + */ +function ExpansionCache ( n ) { + this._cache = new LRU( n ); +} + +ExpansionCache.prototype.makeKey = function ( frame, options ) { + //console.warn( frame._cacheKey ); + return frame._cacheKey + options.type ; +}; + +ExpansionCache.prototype.set = function ( frame, options, value ) { + //if ( frame.title !== null ) { + //console.log( 'setting cache for ' + frame.title + + // ' ' + this.makeKey( frame, options ) + + // ' to: ' + JSON.stringify( value ) ); + return this._cache.set( this.makeKey( frame, options ), value ); + //} +}; + +ExpansionCache.prototype.get = function ( frame, options ) { + return this._cache.get( this.makeKey( frame, options ) ); +}; + + if (typeof module == "object") { module.exports.AsyncTokenTransformManager = AsyncTokenTransformManager; module.exports.SyncTokenTransformManager = SyncTokenTransformManager; diff --git a/modules/parser/mediawiki.WikitextSerializer.js b/modules/parser/mediawiki.WikitextSerializer.js new file mode 100644 index 0000000000..73e9645d89 --- /dev/null +++ b/modules/parser/mediawiki.WikitextSerializer.js @@ -0,0 +1,361 @@ +/** + * Serializes a chunk of tokens or an HTML DOM to MediaWiki's wikitext flavor. + * + * @class + * @constructor + * @param options {Object} List of options for serialization + */ +WikitextSerializer = function( options ) { + this.options = $.extend( { + // defaults + }, options || {} ); +}; + +var WSP = WikitextSerializer.prototype; + +WSP.defaultOptions = { + needParagraphLines: false, + listStack: [], + lastHandler: null +}; + +var id = function( v ) { return function() { return v; }; }; + +WSP._listHandler = function( bullet, state, token ) { + var bullets, res; + var stack = state.listStack; + if (stack.length === 0) { + bullets = "\n" + bullet; + res = bullets; + } else { + var curList = stack[stack.length - 1]; + bullets = curList.bullets + bullet; + curList.itemCount++; + if ( // deeply nested list + curList.itemCount > 2 || + // A nested list, not directly after the li + ( curList.itemCount > 1 && + ! ( state.prevToken.constructor === TagTk && + state.prevToken.name === 'li') )) { + res = bullets; + } else { + res = bullet; + } + } + stack.push({ itemCount: 0, bullets: bullets}); + return res; +}; + +WSP._listEndHandler = function( state, token ) { + state.listStack.pop(); + // FIXME: insert a newline after a list block is closed (the next token is + // no list token). + return ''; +}; + +WSP._listItemHandler = function ( state, token ) { + //console.warn( JSON.stringify( state.listStack ) ); + var stack = state.listStack; + state.needParagraphLines = true; + if (stack.length === 0) { + return ''; + } else { + var curList = stack[stack.length - 1]; + curList.itemCount++; + // > 1 ==> consecutive list items + return ( curList.itemCount > 1 ) ? curList.bullets : ''; + } +}; + +WSP._serializeTableTag = function ( symbol, optionEndSymbol, state, token ) { + if ( token.attribs.length ) { + return symbol + ' ' + + WSP._serializeAttributes( token.attribs ) + optionEndSymbol; + } else { + return symbol; + } +}; + +WSP._linkHandler = function( state, token ) { + return '[['; + // TODO: handle internal/external links etc using RDFa and dataAttribs + // Also convert unannotated html links to external wiki links for html + // import. Might want to consider converting relative links without path + // component and file extension to wiki links. + //if ( rtinfo.type === 'wikilink' ) { + // return '[[' + rtinfo.target + ']]'; + //} else { + // // external link + // return '[' + rtinfo. +}; +WSP._linkEndHandler = function( state, token ) { + return ']]'; +}; + +WSP.tagToWikitext = { + body: {}, + b: { start: id("'''"), end: id("'''") }, + i: { start: id("''"), end: id("''") }, + ul: { + start: WSP._listHandler.bind( null, '*' ), + end: WSP._listEndHandler + }, + ol: { + start: WSP._listHandler.bind( null, '#' ), + end: WSP._listEndHandler + }, + dl: { + start: WSP._listHandler.bind( null, '' ), + end: WSP._listEndHandler + }, + li: { start: WSP._listItemHandler }, + // XXX: handle single-line vs. multi-line dls etc + dt: { start: id(";") }, + dd: { start: id(":") }, + // XXX: handle options + table: { + start: WSP._serializeTableTag.bind(null, "\n{|", ''), + end: id("\n|}") + }, + tbody: {}, + th: { + start: function ( state, token ) { + if ( token.dataAttribs.t_stx === 'row' ) { + return WSP._serializeTableTag("!!", ' |', state, token); + } else { + return WSP._serializeTableTag("\n!", ' |', state, token); + } + } + }, + // XXX: omit for first row in table. + tr: { + start: function ( state, token ) { + if ( state.prevToken.constructor === TagTk && state.prevToken.name === 'tbody' ) { + return ''; + } else { + return WSP._serializeTableTag("\n|-", ' |', state, token ); + } + } + }, + td: { + start: function ( state, token ) { + if ( token.dataAttribs.t_stx === 'row' ) { + return WSP._serializeTableTag("||", ' |', state, token); + } else { + return WSP._serializeTableTag("\n|", ' |', state, token); + } + } + }, + caption: { start: WSP._serializeTableTag.bind(null, "\n|+", ' |') }, + p: { + start: function( state, token ) { + if (state.needParagraphLines) { + return "\n\n"; + } else { + state.needParagraphLines = true; + return ''; + } + } + }, + hr: { start: id("\n----"), end: id("\n") }, + h1: { start: id("\n="), end: id("=\n") }, + h2: { start: id("\n=="), end: id("==\n") }, + h3: { start: id("\n==="), end: id("===\n") }, + h4: { start: id("\n===="), end: id("====\n") }, + h5: { start: id("\n====="), end: id("=====\n") }, + h6: { start: id("\n======"), end: id("======\n") }, + pre: { start: id("<pre>"), end: id("</pre>") }, + a: { start: WSP._linkHandler, end: WSP._linkEndHandler }, + nowiki: { start: id("<nowiki>"), end: id("</nowiki>") } +}; + + +WSP._serializeAttributes = function ( attribs ) { + var out = []; + for ( var i = 0, l = attribs.length; i < l; i++ ) { + var kv = attribs[i]; + if (kv.k.length) { + if ( kv.v.length ) { + out.push( kv.k + '=' + + '"' + kv.v.replace( '"', '"' ) + '"'); + } else { + out.push( kv.k ); + } + } else if ( kv.v.length ) { + // not very likely.. + out.push( kv.v ); + } + } + // XXX: round-trip optional whitespace / line breaks etc + return out.join(' '); +}; + +WSP._eatFirstNewLine = function ( state, chunk ) { + while ( chunk[0] === '\n' ) { + chunk = chunk.substr(1); + } + state.realChunkCB( chunk ); + state.chunkCB = state.realChunkCB; +}; + + +/** + * Serialize a chunk of tokens + */ +WSP.serializeTokens = function( tokens, chunkCB ) { + var state = $.extend({}, this.defaultOptions, this.options), + i, l; + state.chunkCB = WSP._eatFirstNewLine.bind( this, state ); + if ( chunkCB === undefined ) { + var out = []; + state.realChunkCB = out.push.bind(out); + for ( i = 0, l = tokens.length; i < l; i++ ) { + this._serializeToken( state, tokens[i] ); + } + return out; + } else { + state.realChunkCB = chunkCB; + for ( i = 0, l = tokens.length; i < l; i++ ) { + this._serializeToken( state, tokens[i] ); + } + } +}; + + +/** + * Serialize a token. + */ +WSP._serializeToken = function ( state, token ) { + state.prevToken = state.curToken; + state.curToken = token; + var handler; + switch( token.constructor ) { + case TagTk: + case SelfclosingTagTk: + handler = this.tagToWikitext[token.name]; + if ( handler && handler.start ) { + state.chunkCB( handler.start( state, token ) ); + } + break; + case EndTagTk: + handler = this.tagToWikitext[token.name]; + if ( handler && handler.end ) { + state.chunkCB( handler.end( state, token ) ); + } + break; + case String: + state.chunkCB( token ); + break; + case CommentTk: + state.chunkCB( '<!--' + token.value + '-->' ); + break; + case NlTk: + state.chunkCB( '\n' ); + break; + case EOFTk: + break; + default: + console.warn( 'Unhandled token type ' + JSON.stringify( token ) ); + break; + } +}; + +/** + * Serialize an HTML DOM document. + */ +WSP.serializeDOM = function( node, chunkCB ) { + var state = $.extend({}, this.defaultOptions, this.options); + state.chunkCB = this._eatFirstNewLine.bind( this, state ); + if ( ! chunkCB ) { + var out = []; + state.realChunkCB = out.push.bind( out ); + this._serializeDOM( node, state ); + return out.join(''); + } else { + state.realChunkCB = chunkCB; + this._serializeDOM( node, state ); + } +}; + +/** + * Internal worker. Recursively serialize a DOM subtree by creating tokens and + * calling _serializeToken on each of these. + */ +WSP._serializeDOM = function( node, state ) { + // serialize this node + switch( node.nodeType ) { + case Node.ELEMENT_NODE: + //console.warn( node.nodeName.toLowerCase() ); + var children = node.childNodes, + name = node.nodeName.toLowerCase(), + handler = this.tagToWikitext[name]; + if ( handler ) { + var tkAttribs = this._getDOMAttribs(node.attributes), + tkRTInfo = this._getDOMRTInfo(node.attributes); + + this._serializeToken( state, + new TagTk( name, tkAttribs, tkRTInfo ) ); + for ( var i = 0, l = children.length; i < l; i++ ) { + // serialize all children + this._serializeDOM( children[i], state ); + } + this._serializeToken( state, + new EndTagTk( name, tkAttribs, tkRTInfo ) ); + } else { + console.warn( 'Unhandled element: ' + node.outerHTML ); + } + break; + case Node.TEXT_NODE: + this._serializeToken( state, node.data ); + break; + case Node.COMMENT_NODE: + this._serializeToken( state, new CommentTk( node.data ) ); + break; + default: + console.warn( "Unhandled node type: " + + node.outerHTML ); + break; + } +}; + +WSP._getDOMAttribs = function( attribs ) { + // convert to list fo key-value pairs + var out = []; + for ( var i = 0, l = attribs.length; i < l; i++ ) { + var attrib = attribs.item(i); + if ( attrib.name !== 'data-mw' ) { + out.push( { k: attrib.name, v: attrib.value } ); + } + } + return out; +}; + +WSP._getDOMRTInfo = function( attribs ) { + if ( attribs['data-mw'] ) { + return JSON.parse( attribs['data-mw'].value || '{}' ); + } else { + return {}; + } +}; + +// Quick HACK: define Node constants locally +// https://developer.mozilla.org/en/nodeType +var Node = { + ELEMENT_NODE: 1, + ATTRIBUTE_NODE: 2, + TEXT_NODE: 3, + CDATA_SECTION_NODE: 4, + ENTITY_REFERENCE_NODE: 5, + ENTITY_NODE: 6, + PROCESSING_INSTRUCTION_NODE: 7, + COMMENT_NODE: 8, + DOCUMENT_NODE: 9, + DOCUMENT_TYPE_NODE: 10, + DOCUMENT_FRAGMENT_NODE: 11, + NOTATION_NODE: 12 +}; + + +if (typeof module == "object") { + module.exports.WikitextSerializer = WikitextSerializer; +} diff --git a/modules/parser/mediawiki.parser.defines.js b/modules/parser/mediawiki.parser.defines.js index 22eb21243b..e74f409194 100644 --- a/modules/parser/mediawiki.parser.defines.js +++ b/modules/parser/mediawiki.parser.defines.js @@ -3,6 +3,8 @@ * strings or String objects (if attributes are needed). */ +var async = require('async'); + var toString = function() { return JSON.stringify( this ); }; function TagTk( name, attribs, dataAttribs ) { @@ -134,29 +136,115 @@ Params.prototype.named = function () { return out; }; +/** + * Expand a slice of the parameters using the supplied get options. + */ +Params.prototype.getSlice = function ( options, start, end ) { + var args = this.slice( start, end ), + cb = options.cb; + //console.warn( JSON.stringify( args ) ); + async.map( + args, + function( kv, cb2 ) { + if ( kv.v.constructor === String ) { + // nothing to do + cb2( null, kv ); + } else if ( kv.v.constructor === Array && + // remove String from Array + kv.v.length === 1 && kv.v[0].constructor === String ) { + cb2( null, new KV( kv.k, kv.v[0] ) ); + } else { + // Expand the value + var o2 = $.extend( {}, options ); + // add in the key + o2.cb = function ( v ) { + cb2( null, new KV( kv.k, v ) ); + }; + kv.v.get( o2 ); + } + }, + function( err, res ) { + if ( err ) { + console.trace(); + throw JSON.stringify( err ); + } + //console.warn( 'getSlice res: ' + JSON.stringify( res ) ); + cb( res ); + }); +}; -function ParamValue ( chunk, manager ) { - this.chunk = chunk; - this.manager = manager; - this.cache = {}; + + +/** + * A chunk. Wraps a source chunk of tokens with a reference to a frame for + * lazy and shared transformations. Do not use directly- use + * frame.newParserValue instead! + */ +function ParserValue ( source, frame ) { + if ( source.constructor === ParserValue ) { + Object.defineProperty( this, 'source', + { value: source.source, enumerable: false } ); + } else { + Object.defineProperty( this, 'source', + { value: source, enumerable: false } ); + } + Object.defineProperty( this, 'frame', + { value: frame, enumerable: false } ); } -ParamValue.prototype.expanded = function ( format, cb ) { - if ( format === tokens ) { - if ( this.cache.tokens ) { - cb( this.cache.tokens ); + +ParserValue.prototype._defaultTransformOptions = { + type: 'text/x-mediawiki/expanded' +}; + +ParserValue.prototype.toJSON = function() { + return this.source; +}; + +ParserValue.prototype.get = function( options, cb ) { + //console.trace(); + if ( ! options ) { + options = $.extend({}, this._defaultTransformOptions); + } else if ( options.type === undefined ) { + options.type = this._defaultTransformOptions.type; + } + + // convenience cb override for async-style functions that pass a cb as the + // last argument + if ( cb === undefined ) { + cb = options.cb; + } + + // try the cache + var maybeCached = this.source.cache && this.source.cache.get( this.frame, options ); + if ( maybeCached !== undefined ) { + if ( cb ) { + cb ( maybeCached ); } else { - var pipeline = this.manager.pipeFactory.getPipeline( - this.manager.attributeType || 'tokens/wiki', true - ); - pipeline.setFrame( this.manager.frame, null ); + return maybeCached; } } else { - throw "ParamValue.expanded: Unsupported format " + format; + if ( ! options.cb ) { + console.trace(); + throw "Chunk.get: Need to expand asynchronously, but no cb provided! " + + JSON.stringify( this, null, 2 ); + } + options.cb = cb; + this.frame.expand( this.source, options ); } }; - +ParserValue.prototype.length = function () { + return this.source.length; +}; + + +//Chunk.prototype.slice = function () { +// return this.source.slice.apply( this.source, arguments ); +//}; + + +// TODO: don't use globals! if (typeof module == "object") { module.exports = {}; global.TagTk = TagTk; @@ -167,4 +255,5 @@ if (typeof module == "object") { global.EOFTk = EOFTk; global.KV = KV; global.Params = Params; + global.ParserValue = ParserValue; } diff --git a/modules/parser/mediawiki.parser.environment.js b/modules/parser/mediawiki.parser.environment.js index 66fe97f63c..a99913209d 100644 --- a/modules/parser/mediawiki.parser.environment.js +++ b/modules/parser/mediawiki.parser.environment.js @@ -33,7 +33,7 @@ MWParserEnvironment.prototype.lookupKV = function ( kvs, key ) { var kv; for ( var i = 0, l = kvs.length; i < l; i++ ) { kv = kvs[i]; - if ( kv.k === key ) { + if ( kv.k.trim() === key ) { // found, return it. return kv; } @@ -115,7 +115,7 @@ MWParserEnvironment.prototype.KVtoHash = function ( kvs ) { return res; }; -MWParserEnvironment.prototype.setTokenRank = function ( token, rank ) { +MWParserEnvironment.prototype.setTokenRank = function ( rank, token ) { // convert string literal to string object if ( token.constructor === String && token.rank === undefined ) { token = new String( token ); @@ -126,19 +126,25 @@ MWParserEnvironment.prototype.setTokenRank = function ( token, rank ) { // Strip 'end' tokens and trailing newlines MWParserEnvironment.prototype.stripEOFTkfromTokens = function ( tokens ) { - this.dp( 'stripping end or whitespace tokens', tokens ); + this.dp( 'stripping end or whitespace tokens' ); if ( ! tokens.length ) { return tokens; } // Strip 'end' tokens and trailing newlines var l = tokens[tokens.length - 1]; - while ( tokens.length && - ( l.constructor === EOFTk || l.constructor === NlTk ) - ) - { - this.dp( 'stripping end or whitespace tokens' ); - tokens.pop(); - l = tokens[tokens.length - 1]; + if ( l.constructor === EOFTk || l.constructor === NlTk || + ( l.constructor === String && l.match( /^\s+$/ ) ) ) { + var origTokens = tokens; + tokens = origTokens.slice(); + tokens.rank = origTokens.rank; + while ( tokens.length && + (( l.constructor === EOFTk || l.constructor === NlTk ) || + ( l.constructor === String && l.match( /^\s+$/ ) ) ) ) + { + this.dp( 'stripping end or whitespace tokens' ); + tokens.pop(); + l = tokens[tokens.length - 1]; + } } return tokens; }; @@ -269,8 +275,7 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens, strict ) { if ( token === undefined ) { if ( this.debug ) { console.trace(); } this.tp( 'MWParserEnvironment.tokensToString, invalid token: ' + - JSON.stringify( token ) + - ' tokens:' + JSON.stringify( tokens, null, 2 )); + token, ' tokens:', tokens); continue; } if ( token.constructor === String ) { @@ -282,13 +287,13 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens, strict ) { return [out.join(''), tokens.slice( i )]; } var tstring = JSON.stringify( token ); - this.dp ( 'MWParserEnvironment.tokensToString, non-text token: ' + - tstring + JSON.stringify( tokens, null, 2 ) ); + this.dp ( 'MWParserEnvironment.tokensToString, non-text token: ', + tstring, tokens); if ( this.debug ) { console.trace(); } //out.push( tstring ); } } - //console.warn( 'MWParserEnvironment.tokensToString result: ' + out.join('') ); + this.dp( 'MWParserEnvironment.tokensToString result: ', out ); return out.join(''); }; @@ -359,6 +364,7 @@ MWParserEnvironment.prototype.dp = function ( ) { try { console.warn( JSON.stringify( arguments, null, 2 ) ); } catch ( e ) { + console.trace(); console.warn( e ); } } else { diff --git a/modules/parser/mediawiki.parser.js b/modules/parser/mediawiki.parser.js index 2104851938..6c41c3f51f 100644 --- a/modules/parser/mediawiki.parser.js +++ b/modules/parser/mediawiki.parser.js @@ -34,6 +34,7 @@ var fs = require('fs'), Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer, TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler, AttributeExpander = require('./ext.core.AttributeExpander.js').AttributeExpander, + ListHandler = require('./ext.core.ListHandler.js').ListHandler, LinkHandler = require('./ext.core.LinkHandler.js'), WikiLinkHandler = LinkHandler.WikiLinkHandler, ExternalLinkHandler = LinkHandler.ExternalLinkHandler, @@ -86,8 +87,7 @@ ParserPipelineFactory.prototype.recipes = { [ OnlyInclude, IncludeOnly, - NoInclude, - BehaviorSwitchHandler + NoInclude // Insert TokenCollectors for extensions here (don't expand // templates in extension contents); wrap collected tokens in // special extension token. @@ -108,12 +108,19 @@ ParserPipelineFactory.prototype.recipes = { [ 2, 'tokens/x-mediawiki' ], [ TemplateHandler, + /* ExtensionHandler1, */ // using SFH_OBJECT_ARGS in PHP + // Expand attributes after templates to avoid expanding unused branches + // No expansion of quotes, paragraphs etc in attributes, as in + // PHP parser- up to text/x-mediawiki/expanded only. AttributeExpander, - WikiLinkHandler, + + // now all attributes expanded to tokens or string + + WikiLinkHandler, // more convenient after attribute expansion ExternalLinkHandler - /* ExtensionHandler1, */ - /* ExtensionHandler2, */ + /* ExtensionHandler2, */ // using expanded args + // Finally expand attributes to plain text ] ] ], @@ -132,10 +139,19 @@ ParserPipelineFactory.prototype.recipes = { [ // text/wiki-specific tokens QuoteTransformer, + ListHandler, + + // before transforms that depend on behavior switches + // examples: toc generation, edit sections + BehaviorSwitchHandler, + + // Synchronous extensions + Cite, // both before and after paragraph handler + + // Paragraph wrapping PostExpandParagraphHandler, - /* Cite, */ - /* ListHandler, */ Sanitizer + // SkipperUnpacker ] ], @@ -235,7 +251,7 @@ ParserPipelineFactory.prototype.returnPipeline = function ( type, pipe ) { pipe.removeAllListeners( 'end' ); pipe.removeAllListeners( 'chunk' ); var cache = this.pipelineCache[type]; - if ( cache.length < 5 ) { + if ( cache.length < 8 ) { cache.push( pipe ); } }; diff --git a/modules/parser/mediawiki.tokenizer.peg.js b/modules/parser/mediawiki.tokenizer.peg.js index 9d5c42573a..a429af53ec 100644 --- a/modules/parser/mediawiki.tokenizer.peg.js +++ b/modules/parser/mediawiki.tokenizer.peg.js @@ -76,7 +76,7 @@ PegTokenizer.prototype.process = function( text, cacheKey ) { //console.warn( JSON.stringify( maybeCached, null, 2 ) ); for ( var i = 0, l = maybeCached.length; i < l; i++ ) { // emit a clone of this chunk - this.emit('chunk', this.env.cloneTokens( maybeCached[i] )); + this.emit('chunk', maybeCached[i] ); } this.emit('end'); return; diff --git a/modules/parser/package.json b/modules/parser/package.json index 73e1d8f7db..802b36c28a 100644 --- a/modules/parser/package.json +++ b/modules/parser/package.json @@ -13,10 +13,10 @@ "assert": "0.x.x", "jsdom": "0.x.x", "pegjs": "0.x.x", - "lru-cache": "1.x.x" + "lru-cache": "1.x.x", + "async": "0.x.x" }, "devDependencies": { - "coffee-script": "1.x.x", "colors": "0.x.x", "diff": "1.x.x", "html5": "0.x.x" diff --git a/modules/parser/parse.js b/modules/parser/parse.js index f96a627036..c7411f008d 100644 --- a/modules/parser/parse.js +++ b/modules/parser/parse.js @@ -10,6 +10,7 @@ var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFacto ParserEnv = require('./mediawiki.parser.environment.js').MWParserEnvironment, ConvertDOMToLM = require('./mediawiki.LinearModelConverter.js').ConvertDOMToLM, DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter, + WikitextSerializer = require('./mediawiki.WikitextSerializer.js').WikitextSerializer, optimist = require('optimist'); ( function() { @@ -29,6 +30,11 @@ var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFacto 'boolean': true, 'default': false }, + 'wikitext': { + description: 'Output WikiText instead of HTML', + 'boolean': true, + 'default': false + }, 'debug': { description: 'Debug mode', 'boolean': true, @@ -118,6 +124,9 @@ var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFacto null, 2 )); + } else if ( argv.wikitext ) { + new WikitextSerializer().serializeDOM( document.body, + process.stdout.write.bind( process.stdout ) ); } else { process.stdout.write( document.body.innerHTML ); } diff --git a/modules/parser/pegTokenizer.pegjs.txt b/modules/parser/pegTokenizer.pegjs.txt index 9536dbd32e..d9251c4fd1 100644 --- a/modules/parser/pegTokenizer.pegjs.txt +++ b/modules/parser/pegTokenizer.pegjs.txt @@ -131,143 +131,6 @@ }); }; - /* - * Annotate a token stream with list items with appropriate list tokens - * - * XXX: Move this to a token handler in phase sync23! That way we can - * support list items from templates too. - * - * @static - * @method - * @param {[tokens]} Token stream with li tokens - * @returns {[tokens]} Token stream, possibly with additional list tokens - * */ - var annotateList = function ( tokens ) { - var out = [], // List of tokens - bstack = [], // Bullet stack, previous element's listStyle - bnext = [], // Next element's listStyle - endtags = []; // Stack of end tags - - var commonPrefixLength = function (x, y) { - var minLength = Math.min(x.length, y.length); - for(var i = 0; i < minLength; i++) { - if (x[i] != y[i]) - break; - } - return i; - }; - - var pushList = function ( listName, itemName ) { - out.push( new TagTk( listName )); - out.push( new TagTk( itemName )); - endtags.push( new EndTagTk( listName )); - endtags.push( new EndTagTk( itemName )); - }; - - var popTags = function ( n ) { - for(;n > 0; n--) { - // push list item.. - out.push(endtags.pop()); - // and the list end tag - out.push(endtags.pop()); - } - }; - - var isDlDd = function (a, b) { - var ab = [a,b].sort(); - return (ab[0] === ':' && ab[1] === ';'); - }; - - var doListItem = function ( bs, bn ) { - var prefixLen = commonPrefixLength (bs, bn); - var changeLen = Math.max(bs.length, bn.length) - prefixLen; - var prefix = bn.slice(0, prefixLen); - // emit close tag tokens for closed lists - if (changeLen === 0) { - var itemToken = endtags.pop(); - out.push(itemToken); - out.push(new TagTk( itemToken.name )); - endtags.push(new EndTagTk( itemToken.name )); - } else if ( bs.length == bn.length - && changeLen == 1 - && isDlDd( bs[prefixLen], bn[prefixLen] ) ) { - // handle dd/dt transitions - out.push(endtags.pop()); - if( bn[prefixLen] == ';') { - var newName = 'dt'; - } else { - var newName = 'dd'; - } - out.push(new TagTk( newName )); - endtags.push(new EndTagTk( newName )); - } else { - popTags(bs.length - prefixLen); - - if (prefixLen > 0 && bn.length == prefixLen ) { - var itemToken = endtags.pop(); - out.push(itemToken); - out.push(new TagTk( itemToken.name )); - endtags.push(new EndTagTk( itemToken.name )); - } - - for(var i = prefixLen; i < bn.length; i++) { - switch (bn[i]) { - case '*': - pushList('ul', 'li'); - break; - case '#': - pushList('ol', 'li'); - break; - case ';': - pushList('dl', 'dt'); - break; - case ':': - pushList('dl', 'dd'); - break; - default: - throw("Unknown node prefix " + prefix[i]); - } - } - } - }; - - for (var i = 0, length = tokens.length; i < length; i++) { - var token = tokens[i]; - switch ( token.constructor ) { - case TagTk: - switch (token.name) { - case 'list': - // ignore token - break; - case 'listItem': - // convert listItem to list and list item tokens - bnext = token.bullets; - doListItem( bstack, bnext ); - bstack = bnext; - break; - default: - // pass through all remaining start tags - out.push(token); - break; - } - break; - case EndTagTk: - if ( token.name == 'list' ) { - // pop all open list item tokens - popTags(bstack.length); - bstack = []; - } else { - out.push(token); - } - break; - default: - out.push(token); - break; - } - } - return out; - }; - /** * Determine if a string represents a valid ISBN-10 or ISBN-13 identifier @@ -702,7 +565,8 @@ extlink new SelfclosingTagTk( 'extlink', [ new KV('href', target), new KV('content', text) - ] ) + ], + { type: 'extlink' }) ]; } / "[" & { return stops.pop('extlink'); } @@ -837,13 +701,12 @@ ipv6_address * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·} */ tplarg_or_template - = - ! '{{{{{{{' ( - & ( '{{{{{{' ([^}]+ / !'}}}' .)* '}}}}}}') tplarg - / & ( '{{{{{' ([^}]+ / !'}}}' .)* '}}}}}') template - / tplarg - / template - ) + = & '{{{{{{{' '{' tplarg_or_template '}' + / & ( '{{{' &'{{{' tplarg ) tplarg + // tplarg in template + / & ( '{{' &'{{{' tplarg ) template + / tplarg + / template template = "{{" nl_comment_space* @@ -875,14 +738,14 @@ tplarg params:( nl_comment_space* '|' nl_comment_space* r:( - &'}}' { return new KV( '', '') } + &'}}}' { return new KV( '', '') } / p:template_param { return p } ) { return r } )* nl_comment_space* "}}}" { name = flatten( name ); - params.unshift( { k: '', v: name } ); + params.unshift( new KV( name, '' ) ); var obj = new SelfclosingTagTk( 'templatearg', params ); //console.warn( 'tokenizer tplarg ' + JSON.stringify( obj, null, 2 )); //console.warn('template arg @' + pos + '::' + input.substr(pos, 40) ); @@ -1012,7 +875,7 @@ wikilink contentPos: lcontent.pos }; // XXX: Point to object with path, revision and input information - obj.source = input; + //obj.source = input; //console.warn('lcontent: ' + JSON.stringify( lcontent, null, 2 ) ); // Deal with content. XXX: Properly support pipe-trick etc @@ -1240,7 +1103,7 @@ xmlish_tag = nowiki / generic_tag nowiki = "<nowiki>" nc:nowiki_content "</nowiki>" { //console.warn( 'full nowiki return: ' + pp(nc)); - return nc; + return [ new TagTk( 'nowiki' ) ].concat( nc, [ new EndTagTk( 'nowiki' ) ] ); } / "<nowiki>" { //console.warn('nowiki fallback'); @@ -1329,6 +1192,7 @@ generic_tag res.dataAttribs = {}; } res.dataAttribs.sourceTagPos = [tagStartPos - 1, pos]; + res.dataAttribs.stx = 'html'; return res; } @@ -1438,12 +1302,7 @@ block_tag /********************************************************* * Lists *********************************************************/ -lists = e:(dtdd / li) es:(sol (dtdd / li))* -{ - return annotateList( [ new TagTk( 'list' ) ] - .concat(flatten([e].concat(es)) - ,[ new EndTagTk( 'list' ) ])); -} +lists = (dtdd / li) (sol (dtdd / li))* li = bullets:list_char+ c:inlineline? @@ -1457,7 +1316,7 @@ li = bullets:list_char+ } dtdd - = bullets:(!(";" !list_char) list_char)* + = bullets:(!(";" !list_char) lc:list_char { return lc })* ";" & {return stops.inc('colon');} c:inlineline @@ -1476,7 +1335,8 @@ dtdd // c[clen - 1].value = val.substr(0, val.length - 1) + "\u00a0"; // } //} - + + bullets = bullets.join(''); var li = new TagTk( 'listItem' ); li.bullets = bullets + ";"; var li2 = new TagTk( 'listItem' ); @@ -1571,7 +1431,7 @@ table_row_tag table_data_tags = pipe td:table_data_tag - tds:( pipe_pipe tdt:table_data_tag { return tdt } )* { + tds:( pipe_pipe tdt:table_data_tag { tdt[0].dataAttribs.t_stx = 'row'; return tdt } )* { return td.concat(tds); } @@ -1595,7 +1455,7 @@ table_heading_tags = //& { console.warn( 'th enter @' + input.substr(pos, 10)); return true; } "!" th:table_heading_tag - ths:( "!!" tht:table_heading_tag { return tht } )* { + ths:( "!!" tht:table_heading_tag { tht[0].dataAttribs.t_stx = 'row'; return tht } )* { //console.warn( 'thts: ' + pp(th.concat(ths))); return th.concat(ths); } diff --git a/tests/parser/__patched-html5-parser.coffee b/tests/parser/__patched-html5-parser.coffee deleted file mode 100644 index aae6bb9ae4..0000000000 --- a/tests/parser/__patched-html5-parser.coffee +++ /dev/null @@ -1,30 +0,0 @@ -module.exports = (HTML5) -> - htmlparser = new HTML5.Parser - - htmlparser.tree.elementInActiveFormattingElements = (name) -> - els = @activeFormattingElements - i = els.length - 1 - while i >= 0 - break if els[i].type is HTML5.Marker.type - return els[i] if els[i].tagName.toLowerCase() is name - i-- - return false - - htmlparser.tree.reconstructActiveFormattingElements = -> - return if @activeFormattingElements.length is 0 - i = @activeFormattingElements.length - 1 - entry = @activeFormattingElements[i] - return if entry.type is HTML5.Marker.type or @open_elements.indexOf(entry) isnt -1 - while entry.type isnt HTML5.Marker.type and @open_elements.indexOf(entry) is -1 - i -= 1 - entry = @activeFormattingElements[i] - break unless entry - loop - i += 1 - clone = @activeFormattingElements[i].cloneNode() - element = @insert_element(clone.tagName, clone.attributes) - @activeFormattingElements[i] = element - break if element is @activeFormattingElements.last() - return - - return htmlparser diff --git a/tests/parser/parserTests-whitelist.js b/tests/parser/parserTests-whitelist.js index 23efecdf4e..a37b196160 100644 --- a/tests/parser/parserTests-whitelist.js +++ b/tests/parser/parserTests-whitelist.js @@ -18,7 +18,7 @@ testWhiteList["Link containing double-single-quotes '' in text embedded in itali testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>"; // This is a rare edge case, and the new behavior is arguably more consistent -testWhiteList["5 quotes, code coverage +1 line"] = "<p>'<i></i></p>"; +testWhiteList["5 quotes, code coverage +1 line"] = "<p><i><b></b></i></p>"; // The comment in the test already suggests this result as correct, but // supplies the old result without preformatting. diff --git a/tests/parser/parserTests.js b/tests/parser/parserTests.js index 54b771e26a..6fc20cc6b5 100644 --- a/tests/parser/parserTests.js +++ b/tests/parser/parserTests.js @@ -61,6 +61,7 @@ var testWhiteList = require(__dirname + '/parserTests-whitelist.js').testWhiteLi _import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']); _import(pj('parser', 'mediawiki.parser.js'), ['ParserPipelineFactory']); +_import(pj('parser', 'mediawiki.WikitextSerializer.js'), ['WikitextSerializer']); // WikiDom and serializers //_require(pj('es', 'es.js')); @@ -122,6 +123,11 @@ function ParserTests () { 'default': false, 'boolean': true }, + 'roundtrip': { + description: 'Roundtrip testing: Wikitext -> DOM -> wikitext', + 'default': false, + 'boolean': true + }, 'debug': { description: 'Print debugging information', 'default': false, @@ -181,12 +187,7 @@ function ParserTests () { this.articles = {}; - //this.htmlwindow = jsdom.jsdom(null, null, {parser: HTML5}).createWindow(); - //this.htmlparser = new HTML5.Parser({document: this.htmlwindow.document}); - //this.htmlparser = new HTML5.Parser() - // Use a patched version until https://github.com/aredridel/html5/issues/44 is merged - require('coffee-script'); - this.htmlparser = require(__dirname+'/__patched-html5-parser')(HTML5); + this.htmlparser = new HTML5.Parser(); // Test statistics this.passedTests = 0; @@ -324,9 +325,9 @@ ParserTests.prototype.normalizeHTML = function (source) { // known-ok differences. ParserTests.prototype.normalizeOut = function ( out ) { // TODO: Do not strip newlines in pre and nowiki blocks! - return out.replace(/[\r\n]| data-mw="[^">]*"/g, '') + return out.replace(/[\r\n]| (data-mw|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content)="[^">]*"/g, '') .replace(/<!--.*?-->\n?/gm, '') - .replace(/<\/?meta[^>]*>/g, ''); + .replace(/<\/?(meta|nowiki)[^>]*>/g, ''); }; ParserTests.prototype.formatHTML = function ( source ) { @@ -387,12 +388,17 @@ ParserTests.prototype.processResult = function ( index, item, doc ) { this.failParseTests++; console.log('PARSE FAIL', res.err); } else { - // Check the result vs. the expected result. - this.checkResult( item, doc.body.innerHTML ); + if (this.argv.roundtrip) { + var rt_wikiText = new WikitextSerializer().serializeDOM(doc.body); + this.checkRoundTripResult(item, rt_wikiText); + } else { + // Check the result vs. the expected result. + this.checkResult( item, doc.body.innerHTML ); - if ( this.argv.wikidom ) { - // Test HTML DOM -> WikiDOM conversion - this.printWikiDom( parserPipeline.getWikiDom() ); + if ( this.argv.wikidom ) { + // Test HTML DOM -> WikiDOM conversion + this.printWikiDom( parserPipeline.getWikiDom() ); + } } } @@ -470,6 +476,55 @@ ParserTests.prototype.checkResult = function ( item, out ) { } }; +ParserTests.prototype.checkRoundTripResult = function ( item, out ) { + var normalizedOut = out; // FIXME: normalization not in place yet + var normalizedExpected = item.input; // FIXME: normalization not in place yet + if ( normalizedOut !== normalizedExpected ) { + this.printTitle( item, this.argv.quick ); + this.failOutputTests++; + + if( !this.argv.quick ) { + console.log('RAW EXPECTED'.cyan + ':'); + console.log(item.input + "\n"); + + console.log('RAW RENDERED'.cyan + ':'); + console.log(out + "\n"); + + console.log('NORMALIZED EXPECTED'.magenta + ':'); + console.log(normalizedExpected + "\n"); + + console.log('NORMALIZED RENDERED'.magenta + ':'); + console.log(normalizedOut + "\n"); + var patch = jsDiff.createPatch('wikitext.txt', normalizedExpected, normalizedOut, 'before', 'after'); + + console.log('DIFF'.cyan +': '); + + // Strip the header from the patch, we know how diffs work.. + patch = patch.replace(/^[^\n]*\n[^\n]*\n[^\n]*\n[^\n]*\n/, ''); + + var colored_diff = patch.split( '\n' ).map( function(line) { + // Add some colors to diff output + switch( line.charAt(0) ) { + case '-': + return line.red; + case '+': + return line.blue; + default: + return line; + } + }).join( "\n" ); + + + console.log( colored_diff ); + } + } else { + this.passedTests++; + if( !this.argv.quiet ) { + console.log( 'PASSED'.green + ': ' + item.title.yellow ); + } + } +}; + /** * Print out a WikiDom conversion of the HTML DOM @@ -587,9 +642,10 @@ ParserTests.prototype.processCase = function ( i ) { break; case 'hooks': console.warn('parserTests: Unhandled hook ' + JSON.stringify( item ) ); + break; case 'functionhooks': - console.warn('parserTests: Unhandled functionhook ' - + JSON.stringify( item ) ); + console.warn('parserTests: Unhandled functionhook ' + JSON.stringify( item ) ); + break; default: this.comments = []; process.nextTick( this.processCase.bind( this, i + 1 ) );