Merge branch 'master' of ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/VisualEditor into dmrewrite

This commit is contained in:
Rob Moen 2012-05-21 11:01:16 -07:00
commit 3d5da75782
27 changed files with 1817 additions and 1019 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
.svn
*~
*.kate-swp
.*.swp

View file

@ -289,10 +289,34 @@ $messages['ia'] = array(
); );
/** Italian (Italiano) /** Italian (Italiano)
* @author Beta16
* @author F. Cosoleto * @author F. Cosoleto
*/ */
$messages['it'] = array( $messages['it'] = array(
'visualeditor-feedback-prompt' => 'Lascia feedback', 'visualeditor' => 'VisualEditor',
'visualeditorsandbox' => 'Prova editor visivo',
'visualeditor-desc' => 'Editor visivo per MediaWiki',
'visualeditor-sandbox-title' => "Pagina delle prove per l'editor visivo",
'visualeditor-tooltip-wikitext' => 'Mostra/nascondi wikitesto',
'visualeditor-tooltip-json' => 'Mostra/nascondi JSON',
'visualeditor-tooltip-html' => 'Mostra/nascondi HTML',
'visualeditor-tooltip-render' => 'Mostra/nascondi anteprima',
'visualeditor-tooltip-history' => 'Mostra/nascondi azioni',
'visualeditor-tooltip-help' => 'Mostra/nascondi aiuto',
'visualeditor-feedback-prompt' => 'Lascia un commento',
'visualeditor-feedback-dialog-title' => "Lascia un commento sulla pagina delle prove per l'editor visivo",
);
/** Japanese (日本語)
* @author Shirayuki
*/
$messages['ja'] = array(
'visualeditor-tooltip-wikitext' => 'ウィキテキストの表示を切り替え',
'visualeditor-tooltip-json' => 'JSON 表示を切り替え',
'visualeditor-tooltip-html' => 'HTML 表示を切り替え',
'visualeditor-tooltip-render' => 'プレビューを切り替え',
'visualeditor-tooltip-help' => 'ヘルプ表示を切り替え',
'visualeditor-feedback-prompt' => 'フィードバックを残す',
); );
/** Luxembourgish (Lëtzebuergesch) /** Luxembourgish (Lëtzebuergesch)

View file

@ -1,153 +1,50 @@
var TokenCollector = require( './ext.util.TokenCollector.js' ).TokenCollector;
/** /**
* Simple token transform version of the Cite extension. * Simple token transform version of the Cite extension.
* *
* @class * @class
* @constructor * @constructor
*/ */
function Cite ( dispatcher ) { function Cite ( manager, isInclude ) {
this.manager = manager;
this.refGroups = {}; this.refGroups = {};
this.refTokens = []; // Set up the collector for ref sections
// Within ref block new TokenCollector(
this.isActive = false; manager,
this.register( dispatcher ); this.handleRef.bind(this),
true, // match the end-of-input if </ref> is missing
this.rank,
'tag',
'ref'
);
// And register for references tags
manager.addTransform( this.onReferences.bind(this),
this.referencesRank, 'tag', 'references' );
} }
/** Cite.prototype.rank = 2.15; // after QuoteTransformer, but before PostExpandParagraphHandler
* Register with dispatcher. Cite.prototype.referencesRank = 2.6; // after PostExpandParagraphHandler
* //Cite.prototype.rank = 2.6;
* @method
* @param {Object} TokenTransformDispatcher to register to
*/
Cite.prototype.register = function ( dispatcher ) {
// Register for ref and references tag tokens
var self = this;
this.onRefCB = function (ctx) {
return self.onRef(ctx);
};
dispatcher.appendListener( this.onRefCB, 'tag', 'ref' );
dispatcher.appendListener( function (ctx) {
return self.onReferences(ctx);
}, 'tag', 'references' );
dispatcher.appendListener( function (ctx) {
return self.onEnd(ctx);
}, 'end' );
};
/** /**
* Convert list of key-value pairs to object, with first entry for a * Handle ref section tokens collected by the TokenCollector.
* key winning.
*
* XXX: Move to general utils
*
* @static
* @method
* @param {Array} List of [key, value] pairs
* @returns {Object} Object with key/values set, first entry wins.
*/ */
Cite.prototype.attribsToObject = function ( attribs ) { Cite.prototype.handleRef = function ( tokens ) {
if ( attribs === undefined ) { // remove the first ref tag
return {}; var startTag = tokens.shift();
} if ( tokens[tokens.length - 1].name === 'ref' ) {
var obj = {}; tokens.pop();
for ( var i = 0, l = attribs.length; i < l; i++ ) {
var kv = attribs[i];
if (! kv[0] in obj) {
obj[kv[0]] = kv[1];
}
}
return obj;
};
/**
* Handle ref tag tokens.
*
* @method
* @param {Object} TokenContext
* @returns {Object} TokenContext
*/
Cite.prototype.onRef = function ( tokenCTX ) {
var refGroups = this.refGroups;
var getRefGroup = function(group) {
if (!(group in refGroups)) {
var refs = [],
byName = {};
refGroups[group] = {
refs: refs,
byName: byName,
add: function(tokens, options) {
var ref;
if (options.name && options.name in byName) {
ref = byName[options.name];
} else {
var n = refs.length;
var key = n + '';
if (options.name) {
key = options.name + '-' + key;
}
ref = {
tokens: tokens,
index: n,
groupIndex: n, // @fixme
name: options.name,
group: options.group,
key: key,
target: 'cite_note-' + key,
linkbacks: []
};
refs[n] = ref;
if (options.name) {
byName[options.name] = ref;
}
}
ref.linkbacks.push(
'cite_ref-' + ref.key + '-' + ref.linkbacks.length
);
return ref;
}
};
}
return refGroups[group];
};
var token = tokenCTX.token;
// Collect all tokens between ref start and endtag
if ( ! this.isActive &&
token.type === 'TAG' &&
token.name.toLowerCase() === 'ref' ) {
this.curRef = tokenCTX.token;
// Prepend self for 'any' token type
tokenCTX.dispatcher.prependListener(this.onRefCB, 'any' );
tokenCTX.token = null;
this.isActive = true;
return tokenCTX;
} else if ( this.isActive &&
// Also accept really broken ref close tags..
[TagTk, EndTagTk, SelfclosingTagTk].indexOf(token.constructor) >= 0 &&
token.name.toLowerCase() === 'ref'
)
{
this.isActive = false;
tokenCTX.dispatcher.removeListener(this.onRefCB, 'any' );
// fall through for further processing!
} else {
// Inside ref block: Collect all other tokens in refTokens and abort
//console.warn(JSON.stringify(tokenCTX.token, null, 2));
this.refTokens.push(tokenCTX.token);
tokenCTX.token = null;
return tokenCTX;
} }
var options = $.extend({ var options = $.extend({
name: null, name: null,
group: null group: null
}, this.attribsToObject(this.curRef.attribs)); }, this.manager.env.KVtoHash(startTag.attribs));
var group = getRefGroup(options.group); var group = this.getRefGroup(options.group);
var ref = group.add(this.refTokens, options); var ref = group.add(tokens, options);
this.refTokens = []; //console.warn( 'added tokens: ' + JSON.stringify( this.refGroups, null, 2 ));
var linkback = ref.linkbacks[ref.linkbacks.length - 1]; var linkback = ref.linkbacks[ref.linkbacks.length - 1];
@ -158,37 +55,26 @@ Cite.prototype.onRef = function ( tokenCTX ) {
//bits.push(env.formatNum( ref.groupIndex + 1 )); //bits.push(env.formatNum( ref.groupIndex + 1 ));
bits.push(ref.groupIndex + 1); bits.push(ref.groupIndex + 1);
tokenCTX.token = [ var res = [
{ new TagTk('span', [
type: 'TAG', new KV('id', linkback),
name: 'span', new KV('class', 'reference'),
attribs: [ // ignore element when serializing back to wikitext
['id', linkback], new KV('data-nosource', '')
['class', 'reference'], ]
// ignore element when serializing back to wikitext ),
['data-nosource', ''] new TagTk( 'a', [
] new KV('data-type', 'hashlink'),
}, new KV('href', '#' + ref.target)
{ // XXX: Add round-trip info here?
type: 'TAG', ]
name: 'a', ),
attribs: [ '[' + bits.join(' ') + ']',
['data-type', 'hashlink'], new EndTagTk( 'a' ),
['href', '#' + ref.target] new EndTagTk( 'span' )
// XXX: Add round-trip info here?
]
},
'[' + bits.join(' ') + ']',
{
type: 'ENDTAG',
name: 'a'
},
{
type: 'ENDTAG',
name: 'span'
}
]; ];
return tokenCTX; //console.warn( 'ref res: ' + JSON.stringify( res, null, 2 ) );
return { tokens: res };
}; };
/** /**
@ -198,107 +84,121 @@ Cite.prototype.onRef = function ( tokenCTX ) {
* @param {Object} TokenContext * @param {Object} TokenContext
* @returns {Object} TokenContext * @returns {Object} TokenContext
*/ */
Cite.prototype.onReferences = function ( tokenCTX ) { Cite.prototype.onReferences = function ( token, manager ) {
if ( token.constructor === EndTagTk ) {
return {};
}
//console.warn( 'references refGroups:' + JSON.stringify( this.refGroups, null, 2 ) );
var refGroups = this.refGroups; var refGroups = this.refGroups;
var arrow = '↑'; var arrow = '↑';
var renderLine = function( ref ) { var renderLine = function( ref ) {
//console.warn('reftokens: ' + JSON.stringify(ref.tokens, null, 2)); var out = [ new TagTk('li', [new KV('id', ref.target)] ) ];
var out = [{
type: 'TAG',
name: 'li',
attribs: [['id', ref.target]]
}];
if (ref.linkbacks.length == 1) { if (ref.linkbacks.length == 1) {
out = out.concat([ out = out.concat([
{ new TagTk( 'a', [
type: 'TAG', new KV('href', '#' + ref.linkbacks[0])
name: 'a', ]
attribs: [ ),
['data-type', 'hashlink'], arrow,
['href', '#' + ref.linkbacks[0]] new EndTagTk( 'a' )
]
},
{type: 'TEXT', value: arrow},
{type: 'ENDTAG', name: 'a'}
], ],
ref.tokens // The original content tokens ref.tokens // The original content tokens
); );
} else { } else {
out.content.push({type: 'TEXT', value: arrow}); out.push( arrow );
$.each(ref.linkbacks, function(i, linkback) { $.each(ref.linkbacks, function(i, linkback) {
out = out.concat([ out = out.concat([
{ new TagTk( 'a', [
type: 'TAG', new KV('data-type', 'hashlink'),
name: 'a', new KV('href', '#' + ref.linkbacks[0])
attribs: [
['data-type', 'hashlink'],
['href', '#' + ref.linkbacks[0]]
] ]
}, ),
// XXX: make formatNum available! // XXX: make formatNum available!
//{ //{
// type: 'TEXT', // type: 'TEXT',
// value: env.formatNum( ref.groupIndex + '.' + i) // value: env.formatNum( ref.groupIndex + '.' + i)
//}, //},
{type: 'TEXT', value: ref.groupIndex + '.' + i}, ref.groupIndex + '.' + i,
{type: 'ENDTAG', name: 'a'} new EndTagTk( 'a' )
], ],
ref.tokens // The original content tokens ref.tokens // The original content tokens
); );
}); });
} }
//console.warn( 'renderLine res: ' + JSON.stringify( out, null, 2 ));
return out; return out;
}; };
var token = tokenCTX.token; var res;
var options = $.extend({ var options = $.extend({
name: null, name: null,
group: null group: null
}, this.attribsToObject(token.attribs)); }, this.manager.env.KVtoHash(token.attribs));
if (options.group in refGroups) { if (options.group in refGroups) {
var group = refGroups[options.group]; var group = refGroups[options.group];
var listItems = $.map(group.refs, renderLine); var listItems = $.map(group.refs, renderLine);
tokenCTX.token = [ res = [
{ new TagTk( 'ol', [
type: 'TAG', new KV('class', 'references'),
name: 'ol', new KV('data-object', 'references') // Object type
attribs: [ ]
['class', 'references'], )
['data-object', 'references'] // Object type ].concat( listItems, [ new EndTagTk( 'ol' ) ] );
]
}
].concat( listItems, { type: 'ENDTAG', name: 'ol' } );
} else { } else {
tokenCTX.token = { res = [ new SelfclosingTagTk( 'meta', [ new KV('fixme', 'add-rdfa-rt-info') ] ) ];
type: 'SELFCLOSINGTAG',
name: 'placeholder',
attribs: [
['data-origNode', JSON.stringify(token)]
]
};
} }
return tokenCTX; res = res.map( this.manager.env.setTokenRank.bind( res, this.referencesRank ) );
//console.warn( 'references res: ' + JSON.stringify( res, null, 2 ) );
return { tokens: res };
}; };
/** Cite.prototype.getRefGroup = function(group) {
* Handle end token. var refGroups = this.refGroups;
* if (!(group in refGroups)) {
* @method var refs = [],
* @param {Object} TokenContext byName = {};
* @returns {Object} TokenContext refGroups[group] = {
*/ refs: refs,
Cite.prototype.onEnd = function ( tokenCTX ) { byName: byName,
// XXX: Emit error messages if references tag was missing! add: function(tokens, options) {
// Clean up var ref;
this.refGroups = {}; if (options.name && options.name in byName) {
this.refTokens = []; ref = byName[options.name];
this.isActive = false; } else {
return tokenCTX; var n = refs.length;
var key = n + '';
if (options.name) {
key = options.name + '-' + key;
}
ref = {
tokens: tokens,
index: n,
groupIndex: n, // @fixme
name: options.name,
group: options.group,
key: key,
target: 'cite_note-' + key,
linkbacks: []
};
refs[n] = ref;
if (options.name) {
byName[options.name] = ref;
}
}
ref.linkbacks.push(
'cite_ref-' + ref.key + '-' + ref.linkbacks.length
);
return ref;
}
};
}
return refGroups[group];
}; };
if (typeof module == "object") { if (typeof module == "object") {

View file

@ -36,6 +36,7 @@ AttributeExpander.prototype.onToken = function ( token, frame, cb ) {
token.constructor === SelfclosingTagTk) && token.constructor === SelfclosingTagTk) &&
token.attribs && token.attribs &&
token.attribs.length ) { token.attribs.length ) {
// clone the token
token = $.extend( {}, token ); token = $.extend( {}, token );
token.attribs = token.attribs.slice(); token.attribs = token.attribs.slice();
var atm = new AttributeTransformManager( var atm = new AttributeTransformManager(
@ -45,7 +46,7 @@ AttributeExpander.prototype.onToken = function ( token, frame, cb ) {
cb( { async: true } ); cb( { async: true } );
atm.process( token.attribs ); atm.process( token.attribs );
} else { } else {
cb ( { token: token } ); cb ( { tokens: [token] } );
} }
}; };
@ -58,7 +59,7 @@ AttributeExpander.prototype._returnAttributes = function ( token, cb,
{ {
this.manager.env.dp( 'AttributeExpander._returnAttributes: ',attributes ); this.manager.env.dp( 'AttributeExpander._returnAttributes: ',attributes );
token.attribs = attributes; token.attribs = attributes;
cb( { token: token } ); cb( { tokens: [token] } );
}; };
if (typeof module == "object") { if (typeof module == "object") {

View file

@ -6,7 +6,7 @@ function BehaviorSwitchHandler( manager, isInclude ) {
this.manager.addTransform( this.onBehaviorSwitch.bind( this ), this.rank, 'tag', 'behavior-switch' ); this.manager.addTransform( this.onBehaviorSwitch.bind( this ), this.rank, 'tag', 'behavior-switch' );
} }
BehaviorSwitchHandler.prototype.rank = 1.14; BehaviorSwitchHandler.prototype.rank = 2.14;
BehaviorSwitchHandler.prototype.onBehaviorSwitch = function ( token, manager, cb ) { BehaviorSwitchHandler.prototype.onBehaviorSwitch = function ( token, manager, cb ) {
var env = this.manager.env, var env = this.manager.env,

View file

@ -40,7 +40,15 @@ WikiLinkHandler.prototype.onWikiLink = function ( token, frame, cb ) {
// Check if page exists // Check if page exists
// //
//console.warn( 'title: ' + JSON.stringify( title ) ); //console.warn( 'title: ' + JSON.stringify( title ) );
var obj = new TagTk( 'a', [ new KV( 'href', title.makeLink() ) ] ), var obj = new TagTk( 'a',
[
new KV( 'href', title.makeLink() ),
new KV('typeof', 'http://mediawiki.org/rdf/wikilink'),
// Add resource as CURIE- needs global default prefix
// definition.
new KV('resource', '[:' + title.getPrefixedText() + ']')
]
),
content = token.attribs.slice(1, -1); content = token.attribs.slice(1, -1);
//console.warn('content: ' + JSON.stringify( content, null, 2 ) ); //console.warn('content: ' + JSON.stringify( content, null, 2 ) );
// XXX: handle trail // XXX: handle trail
@ -98,7 +106,8 @@ WikiLinkHandler.prototype._prefixImageOptions = {
'alt': 'alt', 'alt': 'alt',
'page': 'page', 'page': 'page',
'thumbnail': 'thumb', 'thumbnail': 'thumb',
'thumb': 'thumb' 'thumb': 'thumb',
'upright': 'aspect'
}; };
WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) { WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) {
@ -150,7 +159,7 @@ WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) {
} else { } else {
var bits = oText.split( '=', 2 ), var bits = oText.split( '=', 2 ),
key = this._prefixImageOptions[ bits[0].trim().toLowerCase() ]; key = this._prefixImageOptions[ bits[0].trim().toLowerCase() ];
if ( bits.length > 1 && key) { if ( bits[0] && key) {
oHash[key] = bits[1]; oHash[key] = bits[1];
options.push( new KV( key, bits[1] ) ); options.push( new KV( key, bits[1] ) );
//console.warn('handle prefix ' + bits ); //console.warn('handle prefix ' + bits );
@ -184,9 +193,8 @@ WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) {
a.dataAttribs = token.dataAttribs; a.dataAttribs = token.dataAttribs;
var width, height; var width, height;
if ( ! height in oHash && ! width in oHash ) { if ( ! oHash.height && ! oHash.width ) {
width = '120px'; width = '200px';
height = '120px';
} else { } else {
width = oHash.width; width = oHash.width;
height = oHash.height; height = oHash.height;
@ -212,7 +220,18 @@ WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, pa
a.dataAttribs.optionHash = oHash; a.dataAttribs.optionHash = oHash;
a.dataAttribs.optionList = options; a.dataAttribs.optionList = options;
var figurestyle = "width: 125px;", var width = 165;
// Handle upright
if ( 'aspect' in oHash ) {
if ( oHash.aspect > 0 ) {
width = width * oHash.aspect;
} else {
width *= 0.75;
}
}
var figurestyle = "width: " + (width + 5) + "px;",
figureclass = "thumb tright thumbinner"; figureclass = "thumb tright thumbinner";
// set horizontal alignment // set horizontal alignment
@ -243,6 +262,7 @@ WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, pa
new KV('class', figureclass), new KV('class', figureclass),
new KV('style', figurestyle), new KV('style', figurestyle),
new KV('typeof', 'http://mediawiki.org/rdf/Thumb'), new KV('typeof', 'http://mediawiki.org/rdf/Thumb'),
// XXX: define this globally?
new KV('prefix', "mw: http://mediawiki.org/rdf/terms/") new KV('prefix', "mw: http://mediawiki.org/rdf/terms/")
] ]
), ),
@ -257,11 +277,13 @@ WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, pa
'img', 'img',
[ [
new KV('src', path), new KV('src', path),
new KV('width', '120px'), new KV('width', width + 'px'),
new KV('height', '120px'), //new KV('height', '160px'),
new KV('class', 'thumbimage'), new KV('class', 'thumbimage'),
new KV('alt', oHash.alt || title.key ), new KV('alt', oHash.alt || title.key ),
new KV('resource', title.getPrefixedText()) // Add resource as CURIE- needs global default prefix
// definition.
new KV('resource', '[:' + title.getPrefixedText() + ']')
] ]
), ),
new EndTagTk( 'a' ), new EndTagTk( 'a' ),
@ -345,17 +367,25 @@ ExternalLinkHandler.prototype.onUrlLink = function ( token, frame, cb ) {
env.tokensToString( env.lookupKV( token.attribs, 'href' ).v ) env.tokensToString( env.lookupKV( token.attribs, 'href' ).v )
); );
if ( this._isImageLink( href ) ) { if ( this._isImageLink( href ) ) {
cb( { token: new SelfclosingTagTk( 'img', cb( { tokens: [ new SelfclosingTagTk( 'img',
[ [
new KV( 'src', href ), new KV( 'src', href ),
new KV( 'alt', href.split('/').last() ) new KV( 'alt', href.split('/').last() ),
] new KV('typeof', 'http://mediawiki.org/rdf/externalImage')
) ],
{ type: 'urllink' }
)
]
} ); } );
} else { } else {
cb( { cb( {
tokens: [ tokens: [
new TagTk( 'a', [ new KV( 'href', href ) ] ), new TagTk( 'a',
[
new KV( 'href', href ),
new KV('typeof', 'http://mediawiki.org/rdf/externalLink')
],
{ type: 'urllink' } ),
href, href,
new EndTagTk( 'a' ) new EndTagTk( 'a' )
] ]
@ -384,7 +414,8 @@ ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) {
[ [
new KV( 'src', src ), new KV( 'src', src ),
new KV( 'alt', src.split('/').last() ) new KV( 'alt', src.split('/').last() )
] ) ],
{ type: 'extlink' })
]; ];
} }
@ -393,7 +424,11 @@ ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) {
[ [
new TagTk ( 'a', new TagTk ( 'a',
[ new KV('href', href) ], [
new KV('href', href),
new KV('typeof', 'http://mediawiki.org/rdf/externalLink'),
new KV('property', 'http://mediawiki.org/rdf/terms/linkcontent')
],
token.dataAttribs token.dataAttribs
) )
].concat( content, [ new EndTagTk( 'a' )]) ].concat( content, [ new EndTagTk( 'a' )])

View file

@ -0,0 +1,150 @@
/*
* Create list tag around list items and map wiki bullet levels to html
*/
function ListHandler ( manager ) {
this.manager = manager
this.reset();
this.manager.addTransform( this.onListItem.bind(this),
this.listRank, 'tag', 'listItem' );
this.manager.addTransform( this.onEnd.bind(this),
this.listRank, 'end' );
}
ListHandler.prototype.listRank = 2.49; // before PostExpandParagraphHandler
ListHandler.prototype.bulletCharsMap = {
'*': { list: 'ul', item: 'li' },
'#': { list: 'ol', item: 'li' },
';': { list: 'dl', item: 'dt' },
':': { list: 'dl', item: 'dd' },
};
ListHandler.prototype.reset = function() {
this.newline = false; // flag to identify a list-less line that terminates
// a list block
this.bstack = []; // Bullet stack, previous element's listStyle
this.endtags = []; // Stack of end tags
};
ListHandler.prototype.onNewline = function ( token, frame, prevToken ) {
var tokens = [token];
if (this.newline) {
// second newline without a list item in between, close the list
tokens = this.end().concat( tokens );
}
this.newline = true;
return { tokens: tokens };
};
ListHandler.prototype.onEnd = function( token, frame, prevToken ) {
return { tokens: this.end().concat([token]) };
};
ListHandler.prototype.end = function( ) {
// pop all open list item tokens
var tokens = this.popTags(this.bstack.length);
this.reset();
this.manager.removeTransform( this.listRank, 'newline' );
return tokens;
};
ListHandler.prototype.onListItem = function ( token, frame, prevToken ) {
if (token.constructor === TagTk){
// convert listItem to list and list item tokens
return { tokens: this.doListItem( this.bstack, token.bullets ) };
}
return { token: token };
};
ListHandler.prototype.commonPrefixLength = function (x, y) {
var minLength = Math.min(x.length, y.length);
for(var i = 0; i < minLength; i++) {
if (x[i] != y[i])
break;
}
return i;
};
ListHandler.prototype.pushList = function ( container ) {
this.endtags.push( new EndTagTk( container.list ));
this.endtags.push( new EndTagTk( container.item ));
return [
new TagTk( container.list ),
new TagTk( container.item )
];
};
ListHandler.prototype.popTags = function ( n ) {
var tokens = [];
for(;n > 0; n--) {
// push list item..
tokens.push(this.endtags.pop());
// and the list end tag
tokens.push(this.endtags.pop());
}
return tokens;
};
ListHandler.prototype.isDlDd = function (a, b) {
var ab = [a,b].sort();
return (ab[0] === ':' && ab[1] === ';');
};
ListHandler.prototype.doListItem = function ( bs, bn ) {
var prefixLen = this.commonPrefixLength (bs, bn),
changeLen = Math.max(bs.length, bn.length) - prefixLen,
prefix = bn.slice(0, prefixLen);
this.newline = false;
this.bstack = bn;
if (!bs.length)
{
this.manager.addTransform( this.onNewline.bind(this),
this.listRank, 'newline' );
}
// emit close tag tokens for closed lists
if (changeLen === 0)
{
var itemToken = this.endtags.pop();
this.endtags.push(new EndTagTk( itemToken.name ));
return [
itemToken,
new TagTk( itemToken.name )
];
}
else if ( bs.length == bn.length
&& changeLen == 1
&& this.isDlDd( bs[prefixLen], bn[prefixLen] ) )
{
// handle dd/dt transitions
var newName = this.bulletCharsMap[bn[prefixLen]].item;
this.endtags.push(new EndTagTk( newName ));
return [
this.endtags.pop(),
new TagTk( newName )
];
}
else
{
var tokens = this.popTags(bs.length - prefixLen);
if (prefixLen > 0 && bn.length == prefixLen ) {
var itemToken = this.endtags.pop();
tokens.push(itemToken);
tokens.push(new TagTk( itemToken.name ));
this.endtags.push(new EndTagTk( itemToken.name ));
}
for(var i = prefixLen; i < bn.length; i++) {
if (!this.bulletCharsMap[bn[i]])
throw("Unknown node prefix " + prefix[i]);
tokens = tokens.concat(this.pushList(this.bulletCharsMap[bn[i]]));
}
return tokens;
}
};
if (typeof module == "object") {
module.exports.ListHandler = ListHandler;
}

View file

@ -44,7 +44,7 @@ OnlyInclude.prototype.onAnyInclude = function ( token, manager ) {
var res = this.accum; var res = this.accum;
res.push( token ); res.push( token );
this.accum = []; this.accum = [];
this.manager.setTokensRank( res, this.rank + 0.001 ); //this.manager.setTokensRank( res, this.rank + 0.001 );
return { tokens: res }; return { tokens: res };
} else { } else {
this.foundOnlyInclude = false; this.foundOnlyInclude = false;
@ -70,11 +70,11 @@ OnlyInclude.prototype.onAnyInclude = function ( token, manager ) {
meta = new TagTk( 'meta' ); meta = new TagTk( 'meta' );
meta.dataAttribs = { strippedTokens: [token] }; meta.dataAttribs = { strippedTokens: [token] };
} }
meta.rank = this.rank; //meta.rank = this.rank;
return { token: meta }; return { token: meta };
} else { } else {
if ( this.inOnlyInclude ) { if ( this.inOnlyInclude ) {
token.rank = this.rank; //token.rank = this.rank;
return { token: token }; return { token: token };
} else { } else {
this.accum.push( token ); this.accum.push( token );

View file

@ -17,6 +17,7 @@
* @author Gabriel Wicke <gwicke@wikimedia.org> * @author Gabriel Wicke <gwicke@wikimedia.org>
*/ */
var async = require('async');
function ParserFunctions ( manager ) { function ParserFunctions ( manager ) {
this.manager = manager; this.manager = manager;
@ -24,22 +25,52 @@ function ParserFunctions ( manager ) {
} }
// Temporary helper. // Temporary helper.
ParserFunctions.prototype._rejoinKV = function ( kv ) { ParserFunctions.prototype._rejoinKV = function ( k, v ) {
if ( kv.k && kv.k.length ) { if ( k.length ) {
return kv.k.concat( ['='], kv.v ); return k.concat( ['='], v );
} else { } else {
return kv.v; return v;
} }
}; };
// XXX: move to frame?
ParserFunctions.prototype.expandKV = function ( kv, cb, defaultValue, type ) {
if ( type === undefined ) {
type = 'tokens/x-mediawiki/expanded';
}
if ( kv === undefined ) {
cb( { tokens: [ defaultValue || '' ] } );
} else if ( kv.constructor === String ) {
return cb( kv );
} else if ( kv.k.constructor === String && kv.v.constructor === String ) {
if ( kv.k ) {
cb( { tokens: [kv.k + '=' + kv.v] } );
} else {
cb( { tokens: [kv.v] } );
}
} else {
var self = this,
getCB = function ( v ) {
cb ( { tokens:
self._rejoinKV( kv.k, v ) } );
};
kv.v.get({
type: type,
cb: getCB,
asyncCB: cb
});
}
};
ParserFunctions.prototype['pf_#if'] = function ( token, frame, cb, args ) { ParserFunctions.prototype['pf_#if'] = function ( token, frame, cb, args ) {
var target = args[0].k; var target = args[0].k;
if ( target.trim() !== '' ) { if ( target.trim() !== '' ) {
//this.env.dp('#if, first branch', target.trim(), argDict[1] ); //this.env.dp('#if, first branch', target.trim(), argDict[1] );
cb( { tokens: (args[1] && this._rejoinKV( args[1] ) || [] ) } ); this.expandKV( args[1], cb );
} else { } else {
//this.env.dp('#if, second branch', target.trim(), argDict[2] ); //this.env.dp('#if, second branch', target.trim(), argDict[2] );
cb( { tokens: (args[2] && this._rejoinKV( args[2] ) || [] ) } ); this.expandKV( args[2], cb );
} }
}; };
@ -51,16 +82,22 @@ ParserFunctions.prototype._switchLookupFallback = function ( frame, kvs, key, di
if ( v && key === v.trim() ) { if ( v && key === v.trim() ) {
// found. now look for the next entry with a non-empty key. // found. now look for the next entry with a non-empty key.
this.manager.env.dp( 'switch found' ); this.manager.env.dp( 'switch found' );
cb = function( res ) { cb ( { tokens: res } ); };
for ( var j = 0; j < l; j++) { for ( var j = 0; j < l; j++) {
kv = kvs[j]; kv = kvs[j];
// XXX: make sure the key is always one of these! // XXX: make sure the key is always one of these!
if ( kv.k.length ) { if ( kv.k.length ) {
return cb( { tokens: kv.v } ); return kv.v.get({
type: 'tokens/x-mediawiki/expanded',
cb: cb,
asyncCB: cb
});
} }
} }
// No value found, return empty string? XXX: check this // No value found, return empty string? XXX: check this
return cb( { } ); return cb( { } );
} else if ( kvs.length ) { } else if ( kvs.length ) {
// search for value-only entry which matches
var i = 0; var i = 0;
if ( v ) { if ( v ) {
i = 1; i = 1;
@ -68,30 +105,44 @@ ParserFunctions.prototype._switchLookupFallback = function ( frame, kvs, key, di
for ( ; i < l; i++ ) { for ( ; i < l; i++ ) {
kv = kvs[i]; kv = kvs[i];
if ( kv.k.length || !kv.v.length ) { if ( kv.k.length || !kv.v.length ) {
// skip entries with keys or empty values
continue; continue;
} else { } else {
if ( ! kv.v.to ) { if ( ! kv.v.get ) {
this.manager.env.ap( kv.v ); this.manager.env.ap( kv.v );
console.trace(); console.trace();
} }
return kv.v.to( 'text/plain/expanded', var self = this;
this._switchLookupFallback.bind( this, frame, kvs.slice(i), key, dict, cb ), cb({ async: true });
cb ); return kv.v.get( {
cb: process.nextTick.bind( process,
self._switchLookupFallback.bind( this, frame,
kvs.slice(i+1), key, dict, cb ) ),
asyncCB: cb
});
} }
} }
// value not found! // value not found!
if ( '#default' in dict ) { if ( '#default' in dict ) {
cb( { tokens: dict['#default'] } ); dict['#default'].get({
} else if ( kvs.length ) { type: 'tokens/x-mediawiki/expanded',
cb: function( res ) { cb ( { tokens: res } ); },
asyncCB: cb
});
/*} else if ( kvs.length ) {
var lastKV = kvs[kvs.length - 1]; var lastKV = kvs[kvs.length - 1];
if ( lastKV && ! lastKV.k.length ) { if ( lastKV && ! lastKV.k.length ) {
cb ( { tokens: lastKV.v } ); cb ( { tokens: lastKV.v } );
} else { } else {
cb ( {} ); cb ( {} );
} }*/
} else { } else {
// nothing found at all.
cb ( {} ); cb ( {} );
} }
} else {
// nothing found at all.
cb ( {} );
} }
}; };
@ -105,7 +156,11 @@ ParserFunctions.prototype['pf_#switch'] = function ( token, frame, cb, args ) {
var dict = args.dict(); var dict = args.dict();
if ( target && dict[target] !== undefined ) { if ( target && dict[target] !== undefined ) {
this.env.dp( 'switch found: ', target, dict, ' res=', dict[target] ); this.env.dp( 'switch found: ', target, dict, ' res=', dict[target] );
cb ( {tokens: dict[target] } ); dict[target].get({
type: 'tokens/x-mediawiki/expanded',
cb: function( res ) { cb ( { tokens: res } ); },
asyncCB: cb
});
} else { } else {
this._switchLookupFallback( frame, args, target, dict, cb ); this._switchLookupFallback( frame, args, target, dict, cb );
} }
@ -117,15 +172,15 @@ ParserFunctions.prototype['pf_#ifeq'] = function ( token, frame, cb, args ) {
cb( {} ); cb( {} );
} else { } else {
var b = args[1].v; var b = args[1].v;
b.to( 'text/plain/expanded', this._ifeq_worker.bind( this, cb, args ), cb ); b.get( { cb: this._ifeq_worker.bind( this, cb, args ), asyncCB: cb } );
} }
}; };
ParserFunctions.prototype._ifeq_worker = function ( cb, args, b ) { ParserFunctions.prototype._ifeq_worker = function ( cb, args, b ) {
if ( args[0].k.trim() === b.trim() ) { if ( args[0].k.trim() === b.trim() ) {
cb( { tokens: ( args[2] && this._rejoinKV( args[2] ) || [] ) } ); this.expandKV( args[2], cb );
} else { } else {
cb( { tokens: ( args[3] && this._rejoinKV( args[3] ) || [] ) } ); this.expandKV( args[3], cb );
} }
}; };
@ -162,18 +217,18 @@ ParserFunctions.prototype['pf_#ifexpr'] = function ( token, frame, cb, args ) {
} }
if ( res ) { if ( res ) {
cb( { tokens: args[1] && this._rejoinKV( args[1] ) || [] } ); this.expandKV( args[1], cb );
} else { } else {
cb( { tokens: args[2] && this._rejoinKV( args[2] ) || [] } ); this.expandKV( args[2], cb );
} }
}; };
ParserFunctions.prototype['pf_#iferror'] = function ( token, frame, cb, args ) { ParserFunctions.prototype['pf_#iferror'] = function ( token, frame, cb, args ) {
var target = args[0].k; var target = args[0].k;
if ( target.indexOf( 'class="error"' ) >= 0 ) { if ( target.indexOf( 'class="error"' ) >= 0 ) {
cb( { tokens: args[1] && args[1].v || [] } ); this.expandKV( args[1], cb );
} else { } else {
cb( { tokens: args[2] && args[2].v || [ target ] } ); this.expandKV( args[1], cb, target );
} }
}; };
@ -203,51 +258,82 @@ ParserFunctions.prototype.pf_lcfirst = function ( token, frame, cb, args ) {
cb( {} ); cb( {} );
} }
}; };
ParserFunctions.prototype.pf_padleft = function ( token, frame, cb, args ) { ParserFunctions.prototype.pf_padleft = function ( token, frame, cb, params ) {
var target = args[0].k, var target = params[0].k;
pad; if ( ! params[1] ) {
if ( args[1] && args[1].v > 0) { return cb( {} );
if ( args[2] && args[2].v ) {
pad = args[2].v;
} else {
pad = '0';
}
var n = args[1].v;
while ( target.length < n ) {
target = pad + target;
}
cb( { tokens: [target] } );
} else {
cb( {} );
} }
// expand parameters 1 and 2
params.getSlice( {
type: 'text/x-mediawiki/expanded',
cb: function ( args ) {
if ( args[0].v > 0) {
var pad = '0';
if ( args[1] && args[1].v !== '' ) {
pad = args[1].v;
}
var n = args[0].v;
while ( target.length < n ) {
target = pad + target;
}
cb( { tokens: [target] } );
} else {
self.env.dp( 'padleft no pad width', args );
cb( {} );
}
}
},
1, 3);
}; };
ParserFunctions.prototype.pf_padright = function ( token, frame, cb, args ) {
var target = args[0].k; ParserFunctions.prototype.pf_padright = function ( token, frame, cb, params ) {
if ( args[1] && args[1].v > 0) { var target = params[0].k;
if ( args[2] && args[2].v ) { if ( ! params[1] ) {
pad = args[2].v; return cb( {} );
} else {
pad = '0';
}
var n = args[1].v;
while ( target.length < n ) {
target = target + pad;
}
cb( { tokens: [target] } );
} else {
cb( {} );
} }
// expand parameters 1 and 2
params.getSlice( {
type: 'text/x-mediawiki/expanded',
cb: function ( args ) {
if ( args[0].v > 0) {
if ( args[1] && args[1].v !== '' ) {
pad = args[1].v;
} else {
pad = '0';
}
var n = args[0].v;
while ( target.length < n ) {
target = target + pad;
}
cb( { tokens: [target] } );
} else {
cb( {} );
}
}
},
1, 3 );
}; };
ParserFunctions.prototype['pf_#tag'] = function ( token, frame, cb, args ) { ParserFunctions.prototype['pf_#tag'] = function ( token, frame, cb, args ) {
// TODO: handle things like {{#tag:nowiki|{{{input1|[[shouldnotbelink]]}}}}} // TODO: handle things like {{#tag:nowiki|{{{input1|[[shouldnotbelink]]}}}}}
// https://www.mediawiki.org/wiki/Future/Parser_development#Token_stream_transforms // https://www.mediawiki.org/wiki/Future/Parser_development#Token_stream_transforms
var target = args[0].k; var target = args[0].k;
cb( { tokens: ( [ new TagTk( target ) ] args[1].v.get({
.concat( args[1].v, type: 'tokens/x-mediawiki/expanded',
[ new EndTagTk( target ) ] ) ) } ); cb: this.tag_worker.bind( this, target, cb ),
asyncCB: cb
});
}; };
ParserFunctions.prototype.tag_worker = function( target, cb, content ) {
cb({
tokens: [ new TagTk( target ) ]
.concat( content,
[ new EndTagTk( target ) ] )
});
};
// TODO: These are just quick wrappers for now, optimize! // TODO: These are just quick wrappers for now, optimize!
ParserFunctions.prototype.pf_currentyear = function ( token, frame, cb, args ) { ParserFunctions.prototype.pf_currentyear = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'Y', [], {} ) ); cb( this._pf_time_tokens( 'Y', [], {} ) );
@ -438,22 +524,30 @@ Date.replaceChars = {
}; };
ParserFunctions.prototype.pf_localurl = function ( token, frame, cb, args ) { ParserFunctions.prototype.pf_localurl = function ( token, frame, cb, args ) {
var target = args[0].k; var target = args[0].k,
env = this.env,
self = this;
args = args.slice(1); args = args.slice(1);
cb( { tokens: ( async.map(
'/' + args,
// FIXME! Figure out correct prefix to use function ( item, cb ) {
//this.env.wgScriptPath + self.expandKV( item, function ( res ) { cb( null, res.tokens ); }, '', 'text/x-mediawiki/expanded' );
'index' + },
this.env.wgScriptExtension + '?title=' + function ( err, expandedArgs ) {
this.env.normalizeTitle( target ) + '&' + if ( err ) {
args.map( console.trace();
function( kv ) { throw( err );
//console.warn( JSON.stringify( kv ) ); }
return (kv.v !== '' && kv.k + '=' + kv.v ) || kv.k; cb({ tokens: [ '/' +
} // FIXME! Figure out correct prefix to use
).join('&') //this.env.wgScriptPath +
) } ); 'index' +
env.wgScriptExtension + '?title=' +
env.normalizeTitle( target ) + '&' +
expandedArgs.join('&') ]
});
}
);
}; };
@ -503,7 +597,7 @@ ParserFunctions.prototype.pf_urlencode = function ( token, frame, cb, args ) {
// http://www.mediawiki.org/wiki/Wikitext_parser/Environment. // http://www.mediawiki.org/wiki/Wikitext_parser/Environment.
// There might be better solutions for some of these. // There might be better solutions for some of these.
ParserFunctions.prototype['pf_#ifexist'] = function ( token, frame, cb, args ) { ParserFunctions.prototype['pf_#ifexist'] = function ( token, frame, cb, args ) {
cb( { tokens: ( args[1] && args[1].v ) || [] } ); this.expandKV( args[1], cb );
}; };
ParserFunctions.prototype.pf_pagesize = function ( token, frame, cb, args ) { ParserFunctions.prototype.pf_pagesize = function ( token, frame, cb, args ) {
cb( { tokens: [ '100' ] } ); cb( { tokens: [ '100' ] } );

View file

@ -16,8 +16,8 @@ function PostExpandParagraphHandler ( dispatcher ) {
} }
// constants // constants
PostExpandParagraphHandler.prototype.newlineRank = 2.2; PostExpandParagraphHandler.prototype.newlineRank = 2.5;
PostExpandParagraphHandler.prototype.anyRank = 2.201; // Just after regular quote and newline PostExpandParagraphHandler.prototype.anyRank = 2.501; // Just after regular quote and newline
// Register this transformer with the TokenTransformer // Register this transformer with the TokenTransformer
@ -43,9 +43,6 @@ PostExpandParagraphHandler.prototype.reset = function ( token, frame, cb ) {
PostExpandParagraphHandler.prototype._finish = function ( ) { PostExpandParagraphHandler.prototype._finish = function ( ) {
var tokens = this.tokens; var tokens = this.tokens;
this.tokens = []; this.tokens = [];
for ( var i = 0, l = tokens.length; i < l; i++ ) {
tokens[ i ].rank = this.anyRank;
}
// remove 'any' registration // remove 'any' registration
this.dispatcher.removeTransform( this.anyRank, 'any' ); this.dispatcher.removeTransform( this.anyRank, 'any' );
this.newLines = 0; this.newLines = 0;

View file

@ -33,14 +33,9 @@ QuoteTransformer.prototype.reset = function ( ) {
// Register this transformer with the TokenTransformer // Register this transformer with the TokenTransformer
QuoteTransformer.prototype.register = function ( dispatcher ) { QuoteTransformer.prototype.register = function ( dispatcher ) {
this.dispatcher = dispatcher; this.dispatcher = dispatcher;
// Register for NEWLINE and QUOTE tag tokens // Register for QUOTE tag tokens
dispatcher.addTransform( this.onNewLine.bind(this),
this.quoteAndNewlineRank, 'newline' );
dispatcher.addTransform( this.onQuote.bind(this), dispatcher.addTransform( this.onQuote.bind(this),
this.quoteAndNewlineRank, 'tag', 'mw-quote' ); this.quoteAndNewlineRank, 'tag', 'mw-quote' );
// Treat end-of-input just the same as a newline
dispatcher.addTransform( this.onNewLine.bind(this),
this.quoteAndNewlineRank, 'end' );
}; };
// Make a copy of the token context // Make a copy of the token context
@ -69,6 +64,11 @@ QuoteTransformer.prototype.onQuote = function ( token, frame, prevToken ) {
if ( ! this.isActive ) { if ( ! this.isActive ) {
this.dispatcher.addTransform( this.onNewLine.bind(this),
this.quoteAndNewlineRank, 'newline' );
// Treat end-of-input just the same as a newline
this.dispatcher.addTransform( this.onNewLine.bind(this),
this.quoteAndNewlineRank, 'end' );
// register for any token if not yet active // register for any token if not yet active
this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' ); this.dispatcher.addTransform( this.onAny.bind(this), this.anyRank, 'any' );
this.isActive = true; this.isActive = true;
@ -137,7 +137,7 @@ QuoteTransformer.prototype.onNewLine = function ( token, frame, prevToken ) {
} }
token.rank = this.quoteAndNewlineRank; //token.rank = this.quoteAndNewlineRank;
//console.warn('chunks: ' + JSON.stringify( this.chunks, null, 2 ) ); //console.warn('chunks: ' + JSON.stringify( this.chunks, null, 2 ) );
@ -203,7 +203,9 @@ QuoteTransformer.prototype.onNewLine = function ( token, frame, prevToken ) {
// prepare for next line // prepare for next line
this.reset(); this.reset();
// remove 'any' registration // remove 'end', 'newline' and 'any' registrations
this.dispatcher.removeTransform( this.quoteAndNewlineRank, 'end' );
this.dispatcher.removeTransform( this.quoteAndNewlineRank, 'newline' );
this.dispatcher.removeTransform( this.anyRank, 'any' ); this.dispatcher.removeTransform( this.anyRank, 'any' );
return res; return res;

View file

@ -115,7 +115,7 @@ Sanitizer.prototype.onAny = function ( token ) {
} }
attribs[i] = new KV( k, v ); attribs[i] = new KV( k, v );
} }
//console.warn(JSON.stringify([attribs, token], null, 2)); //console.warn( 'sanitizer: ' + JSON.stringify([attribs, token], null, 2));
newToken.attribs = attribs; newToken.attribs = attribs;
token = newToken; token = newToken;
} }

View file

@ -52,12 +52,6 @@ TemplateHandler.prototype.onTemplate = function ( token, frame, cb ) {
//console.warn('onTemplate! ' + JSON.stringify( token, null, 2 ) + //console.warn('onTemplate! ' + JSON.stringify( token, null, 2 ) +
// ' args: ' + JSON.stringify( this.manager.args )); // ' args: ' + JSON.stringify( this.manager.args ));
// create a new temporary frame for argument and title expansions
// XXX: only expand keys, and leave value expansion to template parameter
// replacement or parser functions as needed!
// expand argument keys, with callback set to next processing step // expand argument keys, with callback set to next processing step
// XXX: would likely be faster to do this in a tight loop here // XXX: would likely be faster to do this in a tight loop here
var atm = new AttributeTransformManager( var atm = new AttributeTransformManager(
@ -77,7 +71,7 @@ TemplateHandler.prototype._nameArgs = function ( attribs ) {
for ( var i = 0, l = attribs.length; i < l; i++ ) { for ( var i = 0, l = attribs.length; i < l; i++ ) {
// FIXME: Also check for whitespace-only named args! // FIXME: Also check for whitespace-only named args!
if ( ! attribs[i].k.length ) { if ( ! attribs[i].k.length ) {
out.push( {k: [ n.toString() ], v: attribs[i].v } ); out.push( new KV( n.toString(), attribs[i].v ) );
n++; n++;
} else { } else {
out.push( attribs[i] ); out.push( attribs[i] );
@ -129,7 +123,7 @@ TemplateHandler.prototype._expandTemplate = function ( token, frame, cb, attribs
// 'unnamedArgs', tplExpandData.origToken.attribs, // 'unnamedArgs', tplExpandData.origToken.attribs,
// 'funcArg:', funcArg // 'funcArg:', funcArg
// ); // );
//this.manager.env.dp( 'entering prefix', funcArg, tplExpandData.expandedArgs ); this.manager.env.dp( 'entering prefix', target, token );
this.parserFunctions[ 'pf_' + prefix ] this.parserFunctions[ 'pf_' + prefix ]
( token, this.manager.frame, cb, pfAttribs ); ( token, this.manager.frame, cb, pfAttribs );
return; return;
@ -156,7 +150,8 @@ TemplateHandler.prototype._expandTemplate = function ( token, frame, cb, attribs
templateName, templateName,
new EndTagTk( 'a' ) new EndTagTk( 'a' )
]; ];
cb( { tokens: res, allTokensProcessed: true } ); res.rank = this.manager.phaseEndRank;
cb( { tokens: res } );
return; return;
} }
@ -192,7 +187,7 @@ TemplateHandler.prototype._processTemplateAndTitle = function( token, frame, cb,
pipeline.addListener( 'chunk', this._onChunk.bind ( this, cb ) ); pipeline.addListener( 'chunk', this._onChunk.bind ( this, cb ) );
pipeline.addListener( 'end', this._onEnd.bind ( this, cb ) ); pipeline.addListener( 'end', this._onEnd.bind ( this, cb ) );
// Feed the pipeline. XXX: Support different formats. // Feed the pipeline. XXX: Support different formats.
this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle', name, src, attribs ); this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle', name, attribs );
pipeline.process ( src, name ); pipeline.process ( src, name );
}; };
@ -201,7 +196,7 @@ TemplateHandler.prototype._processTemplateAndTitle = function( token, frame, cb,
*/ */
TemplateHandler.prototype._onChunk = function( cb, chunk ) { TemplateHandler.prototype._onChunk = function( cb, chunk ) {
// We encapsulate the output by default, so collect tokens here. // We encapsulate the output by default, so collect tokens here.
this.manager.env.stripEOFTkfromTokens( chunk ); chunk = this.manager.env.stripEOFTkfromTokens( chunk );
this.manager.env.dp( 'TemplateHandler._onChunk', chunk ); this.manager.env.dp( 'TemplateHandler._onChunk', chunk );
cb( { tokens: chunk, async: true } ); cb( { tokens: chunk, async: true } );
}; };
@ -234,7 +229,7 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function ( title, parentCB, c
this.manager.env.dp( 'Note: trying to fetch ', title ); this.manager.env.dp( 'Note: trying to fetch ', title );
// Start a new request if none is outstanding // Start a new request if none is outstanding
this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue ); //this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue );
if ( this.manager.env.requestQueue[title] === undefined ) { if ( this.manager.env.requestQueue[title] === undefined ) {
this.manager.env.tp( 'Note: Starting new request for ' + title ); this.manager.env.tp( 'Note: Starting new request for ' + title );
this.manager.env.requestQueue[title] = new TemplateRequest( this.manager, title ); this.manager.env.requestQueue[title] = new TemplateRequest( this.manager, title );
@ -269,24 +264,29 @@ TemplateHandler.prototype.onTemplateArg = function ( token, frame, cb ) {
TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, attributes ) { TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, attributes ) {
//console.warn( '_returnArgAttributes: ' + JSON.stringify( attributes )); //console.warn( '_returnArgAttributes: ' + JSON.stringify( attributes ));
var argName = this.manager.env.tokensToString( attributes[0].v ).trim(), var argName = this.manager.env.tokensToString( attributes[0].k ).trim(),
res, res,
dict = this.manager.frame.args.named(); dict = this.manager.frame.args.named();
this.manager.env.dp( 'args', argName, dict ); this.manager.env.dp( 'args', argName /*, dict*/ );
if ( argName in dict ) { if ( argName in dict ) {
// return tokens for argument // return tokens for argument
//console.warn( 'templateArg found: ' + argName + //console.warn( 'templateArg found: ' + argName +
// ' vs. ' + JSON.stringify( this.manager.args ) ); // ' vs. ' + JSON.stringify( this.manager.args ) );
res = dict[argName]; res = dict[argName];
this.manager.env.dp( 'arg res:', res );
if ( res.constructor === String ) { if ( res.constructor === String ) {
cb( { tokens: [res] } ); cb( { tokens: [res] } );
} else { } else {
dict[argName].to('tokens/x-mediawiki/expanded', function(chunk) { cb ( { tokens: chunk } ); }); dict[argName].get({
type: 'tokens/x-mediawiki/expanded',
cb: function( res ) { cb ( { tokens: res } ); },
asyncCB: cb
});
} }
return; return;
} else { } else {
this.manager.env.dp( 'templateArg not found: ', argName, this.manager.env.dp( 'templateArg not found: ', argName
' vs. ', dict ); /*' vs. ', dict */ );
if ( attributes.length > 1 ) { if ( attributes.length > 1 ) {
res = attributes[1].v; res = attributes[1].v;
} else { } else {
@ -334,7 +334,7 @@ function TemplateRequest ( manager, title ) {
// Inherit from EventEmitter // Inherit from EventEmitter
TemplateRequest.prototype = new events.EventEmitter(); TemplateRequest.prototype = new events.EventEmitter();
TemplateHandler.prototype.constructor = TemplateRequest; TemplateRequest.prototype.constructor = TemplateRequest;
TemplateRequest.prototype._handler = function (error, response, body) { TemplateRequest.prototype._handler = function (error, response, body) {
//console.warn( 'response for ' + title + ' :' + body + ':' ); //console.warn( 'response for ' + title + ' :' + body + ':' );
@ -353,7 +353,7 @@ TemplateRequest.prototype._handler = function (error, response, body) {
} else if(response.statusCode == 200) { } else if(response.statusCode == 200) {
var src = '', var src = '',
data, data,
normalizedTitle; normalizedTitle;
try { try {
//console.warn( 'body: ' + body ); //console.warn( 'body: ' + body );
data = JSON.parse( body ); data = JSON.parse( body );
@ -375,8 +375,29 @@ TemplateRequest.prototype._handler = function (error, response, body) {
console.warn( 'Did not find page revisions in the returned body:' + body ); console.warn( 'Did not find page revisions in the returned body:' + body );
src = ''; src = '';
} }
// check for #REDIRECT
var redirMatch = src.match( /[\r\n\s]*#\s*redirect\s\[\[([^\]]+)\]\]/i )
if ( redirMatch ) {
var title = redirMatch[1];
var url = this.manager.env.wgScript + '/api' +
this.manager.env.wgScriptExtension +
'?' +
qs.stringify( {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
} );
//'?format=json&action=query&prop=revisions&rvprop=content&titles=' + title;
this.requestOptions.url = url;
request( this.requestOptions, this._handler.bind(this) );
return;
}
//console.warn( 'Page ' + title + ': got ' + src ); //console.warn( 'Page ' + title + ': got ' + src );
this.manager.env.tp( 'Retrieved ' + this.title ); this.manager.env.tp( 'Retrieved ' + this.title, src );
// Add the source to the cache // Add the source to the cache
this.manager.env.pageCache[this.title] = src; this.manager.env.pageCache[this.title] = src;
@ -387,7 +408,7 @@ TemplateRequest.prototype._handler = function (error, response, body) {
// //
var listeners = this.listeners( 'src' ); var listeners = this.listeners( 'src' );
var processSome = function () { var processSome = function () {
// XXX: experiment a bit with the number of callback per // XXX: experiment a bit with the number of callbacks per
// iteration! // iteration!
var maxIters = Math.min(1, listeners.length); var maxIters = Math.min(1, listeners.length);
for ( var it = 0; it < maxIters; it++ ) { for ( var it = 0; it < maxIters; it++ ) {
@ -408,9 +429,9 @@ TemplateRequest.prototype._handler = function (error, response, body) {
// XXX: handle other status codes // XXX: handle other status codes
// Remove self from request queue // Remove self from request queue
this.manager.env.dp( 'trying to remove ', this.title, ' from requestQueue' ); //this.manager.env.dp( 'trying to remove ', this.title, ' from requestQueue' );
delete this.manager.env.requestQueue[this.title]; delete this.manager.env.requestQueue[this.title];
this.manager.env.dp( 'after deletion:', this.manager.env.requestQueue ); //this.manager.env.dp( 'after deletion:', this.manager.env.requestQueue );
}; };
/* /*

View file

@ -54,7 +54,13 @@ TokenCollector.prototype._anyDelta = 0.00001;
* XXX: Adjust to sync phase callback when that is modified! * XXX: Adjust to sync phase callback when that is modified!
*/ */
TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) { TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) {
if ( this.isActive ) { if ( token.constructor === SelfclosingTagTk && !this.isActive ) {
this.manager.env.dp( 'skipping collection on ', token );
// just ignore it
return { tokens: [ token ] }; //this.transformation( [token, token] );
} else if ( this.isActive &&
( token.constructor === EndTagTk || token.constructor === EOFTk ) ) {
this.manager.env.dp( 'finishing collection on ', token );
var res; var res;
// finish processing // finish processing
this.tokens.push ( token ); this.tokens.push ( token );
@ -63,7 +69,7 @@ TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) {
this.manager.removeTransform( this.rank, 'end' ); this.manager.removeTransform( this.rank, 'end' );
if ( token.constructor !== EOFTk || this.toEnd ) { if ( token.constructor !== EOFTk || this.toEnd ) {
// end token // end token
res = this.transformation ( this.tokens, this.cb, this.manager ); res = this.transformation ( this.tokens );
this.tokens = []; this.tokens = [];
// Transformation can be either sync or async, but receives all collected // Transformation can be either sync or async, but receives all collected
// tokens instead of a single token. // tokens instead of a single token.
@ -76,20 +82,27 @@ TokenCollector.prototype._onDelimiterToken = function ( token, frame, cb ) {
return { tokens: res }; return { tokens: res };
} }
} else if ( token.constructor !== EOFTk ) { } else if ( token.constructor !== EOFTk ) {
this.manager.env.dp( 'starting collection on ', token );
// start collection if ( this.isActive ) {
this.manager.env.dp( 'already active: ', token );
} else {
// start collection
this.manager.env.dp( 'starting collection on ', token );
this.manager.addTransform( this._onAnyToken.bind ( this ),
this.rank + this._anyDelta, 'any' );
this.manager.addTransform( this._onDelimiterToken.bind( this ),
this.rank, 'end' );
this.isActive = true;
}
this.tokens.push ( token ); this.tokens.push ( token );
this.manager.addTransform( this._onAnyToken.bind ( this ),
this.rank + this._anyDelta, 'any' );
this.manager.addTransform( this._onDelimiterToken.bind( this ),
this.rank, 'end' );
// Did not encounter a matching end token before the end, and are not // Did not encounter a matching end token before the end, and are not
// supposed to collect to the end. So just return the tokens verbatim. // supposed to collect to the end. So just return the tokens verbatim.
this.isActive = true;
return { }; return { };
} else { } else {
// pass through end token // pass through end token
return { token: token }; this.tokens = [];
this.isActive = false;
return { tokens: [ token ] };
} }
}; };

View file

@ -72,16 +72,17 @@ FauxHTML5.TreeBuilder.prototype._att = function (maybeAttribs) {
// Adapt the token format to internal HTML tree builder format, call the actual // Adapt the token format to internal HTML tree builder format, call the actual
// html tree builder by emitting the token. // html tree builder by emitting the token.
FauxHTML5.TreeBuilder.prototype.processToken = function (token) { FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
var attribs = token.attribs || [];
if ( token.dataAttribs ) { if ( token.dataAttribs ) {
if ( ! token.attribs ) { if ( ! token.attribs ) {
token.attribs = []; token.attribs = [];
} }
token.attribs.push( attribs = attribs.concat([
{ {
// Mediawiki-specific round-trip / non-semantic information // Mediawiki-specific round-trip / non-semantic information
k: 'data-mw', k: 'data-mw',
v: JSON.stringify( token.dataAttribs ) v: JSON.stringify( token.dataAttribs )
} ); } ] );
} }
switch( token.constructor ) { switch( token.constructor ) {
@ -93,24 +94,24 @@ FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
case TagTk: case TagTk:
this.emit('token', {type: 'StartTag', this.emit('token', {type: 'StartTag',
name: token.name, name: token.name,
data: this._att(token.attribs)}); data: this._att(attribs)});
break; break;
case SelfclosingTagTk: case SelfclosingTagTk:
this.emit('token', {type: 'StartTag', this.emit('token', {type: 'StartTag',
name: token.name, name: token.name,
data: this._att(token.attribs)}); data: this._att(attribs)});
if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) { if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) {
// VOID_ELEMENTS are automagically treated as self-closing by // VOID_ELEMENTS are automagically treated as self-closing by
// the tree builder // the tree builder
this.emit('token', {type: 'EndTag', this.emit('token', {type: 'EndTag',
name: token.name, name: token.name,
data: this._att(token.attribs)}); data: this._att(attribs)});
} }
break; break;
case EndTagTk: case EndTagTk:
this.emit('token', {type: 'EndTag', this.emit('token', {type: 'EndTag',
name: token.name, name: token.name,
data: this._att(token.attribs)}); data: this._att(attribs)});
break; break;
case CommentTk: case CommentTk:
this.emit('token', {type: 'Comment', this.emit('token', {type: 'Comment',

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,361 @@
/**
* Serializes a chunk of tokens or an HTML DOM to MediaWiki's wikitext flavor.
*
* @class
* @constructor
* @param options {Object} List of options for serialization
*/
WikitextSerializer = function( options ) {
this.options = $.extend( {
// defaults
}, options || {} );
};
var WSP = WikitextSerializer.prototype;
WSP.defaultOptions = {
needParagraphLines: false,
listStack: [],
lastHandler: null
};
var id = function( v ) { return function() { return v; }; };
WSP._listHandler = function( bullet, state, token ) {
var bullets, res;
var stack = state.listStack;
if (stack.length === 0) {
bullets = "\n" + bullet;
res = bullets;
} else {
var curList = stack[stack.length - 1];
bullets = curList.bullets + bullet;
curList.itemCount++;
if ( // deeply nested list
curList.itemCount > 2 ||
// A nested list, not directly after the li
( curList.itemCount > 1 &&
! ( state.prevToken.constructor === TagTk &&
state.prevToken.name === 'li') )) {
res = bullets;
} else {
res = bullet;
}
}
stack.push({ itemCount: 0, bullets: bullets});
return res;
};
WSP._listEndHandler = function( state, token ) {
state.listStack.pop();
// FIXME: insert a newline after a list block is closed (the next token is
// no list token).
return '';
};
WSP._listItemHandler = function ( state, token ) {
//console.warn( JSON.stringify( state.listStack ) );
var stack = state.listStack;
state.needParagraphLines = true;
if (stack.length === 0) {
return '';
} else {
var curList = stack[stack.length - 1];
curList.itemCount++;
// > 1 ==> consecutive list items
return ( curList.itemCount > 1 ) ? curList.bullets : '';
}
};
WSP._serializeTableTag = function ( symbol, optionEndSymbol, state, token ) {
if ( token.attribs.length ) {
return symbol + ' ' +
WSP._serializeAttributes( token.attribs ) + optionEndSymbol;
} else {
return symbol;
}
};
WSP._linkHandler = function( state, token ) {
return '[[';
// TODO: handle internal/external links etc using RDFa and dataAttribs
// Also convert unannotated html links to external wiki links for html
// import. Might want to consider converting relative links without path
// component and file extension to wiki links.
//if ( rtinfo.type === 'wikilink' ) {
// return '[[' + rtinfo.target + ']]';
//} else {
// // external link
// return '[' + rtinfo.
};
WSP._linkEndHandler = function( state, token ) {
return ']]';
};
WSP.tagToWikitext = {
body: {},
b: { start: id("'''"), end: id("'''") },
i: { start: id("''"), end: id("''") },
ul: {
start: WSP._listHandler.bind( null, '*' ),
end: WSP._listEndHandler
},
ol: {
start: WSP._listHandler.bind( null, '#' ),
end: WSP._listEndHandler
},
dl: {
start: WSP._listHandler.bind( null, '' ),
end: WSP._listEndHandler
},
li: { start: WSP._listItemHandler },
// XXX: handle single-line vs. multi-line dls etc
dt: { start: id(";") },
dd: { start: id(":") },
// XXX: handle options
table: {
start: WSP._serializeTableTag.bind(null, "\n{|", ''),
end: id("\n|}")
},
tbody: {},
th: {
start: function ( state, token ) {
if ( token.dataAttribs.t_stx === 'row' ) {
return WSP._serializeTableTag("!!", ' |', state, token);
} else {
return WSP._serializeTableTag("\n!", ' |', state, token);
}
}
},
// XXX: omit for first row in table.
tr: {
start: function ( state, token ) {
if ( state.prevToken.constructor === TagTk && state.prevToken.name === 'tbody' ) {
return '';
} else {
return WSP._serializeTableTag("\n|-", ' |', state, token );
}
}
},
td: {
start: function ( state, token ) {
if ( token.dataAttribs.t_stx === 'row' ) {
return WSP._serializeTableTag("||", ' |', state, token);
} else {
return WSP._serializeTableTag("\n|", ' |', state, token);
}
}
},
caption: { start: WSP._serializeTableTag.bind(null, "\n|+", ' |') },
p: {
start: function( state, token ) {
if (state.needParagraphLines) {
return "\n\n";
} else {
state.needParagraphLines = true;
return '';
}
}
},
hr: { start: id("\n----"), end: id("\n") },
h1: { start: id("\n="), end: id("=\n") },
h2: { start: id("\n=="), end: id("==\n") },
h3: { start: id("\n==="), end: id("===\n") },
h4: { start: id("\n===="), end: id("====\n") },
h5: { start: id("\n====="), end: id("=====\n") },
h6: { start: id("\n======"), end: id("======\n") },
pre: { start: id("<pre>"), end: id("</pre>") },
a: { start: WSP._linkHandler, end: WSP._linkEndHandler },
nowiki: { start: id("<nowiki>"), end: id("</nowiki>") }
};
WSP._serializeAttributes = function ( attribs ) {
var out = [];
for ( var i = 0, l = attribs.length; i < l; i++ ) {
var kv = attribs[i];
if (kv.k.length) {
if ( kv.v.length ) {
out.push( kv.k + '=' +
'"' + kv.v.replace( '"', '&quot;' ) + '"');
} else {
out.push( kv.k );
}
} else if ( kv.v.length ) {
// not very likely..
out.push( kv.v );
}
}
// XXX: round-trip optional whitespace / line breaks etc
return out.join(' ');
};
WSP._eatFirstNewLine = function ( state, chunk ) {
while ( chunk[0] === '\n' ) {
chunk = chunk.substr(1);
}
state.realChunkCB( chunk );
state.chunkCB = state.realChunkCB;
};
/**
* Serialize a chunk of tokens
*/
WSP.serializeTokens = function( tokens, chunkCB ) {
var state = $.extend({}, this.defaultOptions, this.options),
i, l;
state.chunkCB = WSP._eatFirstNewLine.bind( this, state );
if ( chunkCB === undefined ) {
var out = [];
state.realChunkCB = out.push.bind(out);
for ( i = 0, l = tokens.length; i < l; i++ ) {
this._serializeToken( state, tokens[i] );
}
return out;
} else {
state.realChunkCB = chunkCB;
for ( i = 0, l = tokens.length; i < l; i++ ) {
this._serializeToken( state, tokens[i] );
}
}
};
/**
* Serialize a token.
*/
WSP._serializeToken = function ( state, token ) {
state.prevToken = state.curToken;
state.curToken = token;
var handler;
switch( token.constructor ) {
case TagTk:
case SelfclosingTagTk:
handler = this.tagToWikitext[token.name];
if ( handler && handler.start ) {
state.chunkCB( handler.start( state, token ) );
}
break;
case EndTagTk:
handler = this.tagToWikitext[token.name];
if ( handler && handler.end ) {
state.chunkCB( handler.end( state, token ) );
}
break;
case String:
state.chunkCB( token );
break;
case CommentTk:
state.chunkCB( '<!--' + token.value + '-->' );
break;
case NlTk:
state.chunkCB( '\n' );
break;
case EOFTk:
break;
default:
console.warn( 'Unhandled token type ' + JSON.stringify( token ) );
break;
}
};
/**
* Serialize an HTML DOM document.
*/
WSP.serializeDOM = function( node, chunkCB ) {
var state = $.extend({}, this.defaultOptions, this.options);
state.chunkCB = this._eatFirstNewLine.bind( this, state );
if ( ! chunkCB ) {
var out = [];
state.realChunkCB = out.push.bind( out );
this._serializeDOM( node, state );
return out.join('');
} else {
state.realChunkCB = chunkCB;
this._serializeDOM( node, state );
}
};
/**
* Internal worker. Recursively serialize a DOM subtree by creating tokens and
* calling _serializeToken on each of these.
*/
WSP._serializeDOM = function( node, state ) {
// serialize this node
switch( node.nodeType ) {
case Node.ELEMENT_NODE:
//console.warn( node.nodeName.toLowerCase() );
var children = node.childNodes,
name = node.nodeName.toLowerCase(),
handler = this.tagToWikitext[name];
if ( handler ) {
var tkAttribs = this._getDOMAttribs(node.attributes),
tkRTInfo = this._getDOMRTInfo(node.attributes);
this._serializeToken( state,
new TagTk( name, tkAttribs, tkRTInfo ) );
for ( var i = 0, l = children.length; i < l; i++ ) {
// serialize all children
this._serializeDOM( children[i], state );
}
this._serializeToken( state,
new EndTagTk( name, tkAttribs, tkRTInfo ) );
} else {
console.warn( 'Unhandled element: ' + node.outerHTML );
}
break;
case Node.TEXT_NODE:
this._serializeToken( state, node.data );
break;
case Node.COMMENT_NODE:
this._serializeToken( state, new CommentTk( node.data ) );
break;
default:
console.warn( "Unhandled node type: " +
node.outerHTML );
break;
}
};
WSP._getDOMAttribs = function( attribs ) {
// convert to list fo key-value pairs
var out = [];
for ( var i = 0, l = attribs.length; i < l; i++ ) {
var attrib = attribs.item(i);
if ( attrib.name !== 'data-mw' ) {
out.push( { k: attrib.name, v: attrib.value } );
}
}
return out;
};
WSP._getDOMRTInfo = function( attribs ) {
if ( attribs['data-mw'] ) {
return JSON.parse( attribs['data-mw'].value || '{}' );
} else {
return {};
}
};
// Quick HACK: define Node constants locally
// https://developer.mozilla.org/en/nodeType
var Node = {
ELEMENT_NODE: 1,
ATTRIBUTE_NODE: 2,
TEXT_NODE: 3,
CDATA_SECTION_NODE: 4,
ENTITY_REFERENCE_NODE: 5,
ENTITY_NODE: 6,
PROCESSING_INSTRUCTION_NODE: 7,
COMMENT_NODE: 8,
DOCUMENT_NODE: 9,
DOCUMENT_TYPE_NODE: 10,
DOCUMENT_FRAGMENT_NODE: 11,
NOTATION_NODE: 12
};
if (typeof module == "object") {
module.exports.WikitextSerializer = WikitextSerializer;
}

View file

@ -3,6 +3,8 @@
* strings or String objects (if attributes are needed). * strings or String objects (if attributes are needed).
*/ */
var async = require('async');
var toString = function() { return JSON.stringify( this ); }; var toString = function() { return JSON.stringify( this ); };
function TagTk( name, attribs, dataAttribs ) { function TagTk( name, attribs, dataAttribs ) {
@ -134,29 +136,115 @@ Params.prototype.named = function () {
return out; return out;
}; };
/**
* Expand a slice of the parameters using the supplied get options.
*/
Params.prototype.getSlice = function ( options, start, end ) {
var args = this.slice( start, end ),
cb = options.cb;
//console.warn( JSON.stringify( args ) );
async.map(
args,
function( kv, cb2 ) {
if ( kv.v.constructor === String ) {
// nothing to do
cb2( null, kv );
} else if ( kv.v.constructor === Array &&
// remove String from Array
kv.v.length === 1 && kv.v[0].constructor === String ) {
cb2( null, new KV( kv.k, kv.v[0] ) );
} else {
// Expand the value
var o2 = $.extend( {}, options );
// add in the key
o2.cb = function ( v ) {
cb2( null, new KV( kv.k, v ) );
};
kv.v.get( o2 );
}
},
function( err, res ) {
if ( err ) {
console.trace();
throw JSON.stringify( err );
}
//console.warn( 'getSlice res: ' + JSON.stringify( res ) );
cb( res );
});
};
function ParamValue ( chunk, manager ) {
this.chunk = chunk;
this.manager = manager; /**
this.cache = {}; * A chunk. Wraps a source chunk of tokens with a reference to a frame for
* lazy and shared transformations. Do not use directly- use
* frame.newParserValue instead!
*/
function ParserValue ( source, frame ) {
if ( source.constructor === ParserValue ) {
Object.defineProperty( this, 'source',
{ value: source.source, enumerable: false } );
} else {
Object.defineProperty( this, 'source',
{ value: source, enumerable: false } );
}
Object.defineProperty( this, 'frame',
{ value: frame, enumerable: false } );
} }
ParamValue.prototype.expanded = function ( format, cb ) {
if ( format === tokens ) { ParserValue.prototype._defaultTransformOptions = {
if ( this.cache.tokens ) { type: 'text/x-mediawiki/expanded'
cb( this.cache.tokens ); };
ParserValue.prototype.toJSON = function() {
return this.source;
};
ParserValue.prototype.get = function( options, cb ) {
//console.trace();
if ( ! options ) {
options = $.extend({}, this._defaultTransformOptions);
} else if ( options.type === undefined ) {
options.type = this._defaultTransformOptions.type;
}
// convenience cb override for async-style functions that pass a cb as the
// last argument
if ( cb === undefined ) {
cb = options.cb;
}
// try the cache
var maybeCached = this.source.cache && this.source.cache.get( this.frame, options );
if ( maybeCached !== undefined ) {
if ( cb ) {
cb ( maybeCached );
} else { } else {
var pipeline = this.manager.pipeFactory.getPipeline( return maybeCached;
this.manager.attributeType || 'tokens/wiki', true
);
pipeline.setFrame( this.manager.frame, null );
} }
} else { } else {
throw "ParamValue.expanded: Unsupported format " + format; if ( ! options.cb ) {
console.trace();
throw "Chunk.get: Need to expand asynchronously, but no cb provided! " +
JSON.stringify( this, null, 2 );
}
options.cb = cb;
this.frame.expand( this.source, options );
} }
}; };
ParserValue.prototype.length = function () {
return this.source.length;
};
//Chunk.prototype.slice = function () {
// return this.source.slice.apply( this.source, arguments );
//};
// TODO: don't use globals!
if (typeof module == "object") { if (typeof module == "object") {
module.exports = {}; module.exports = {};
global.TagTk = TagTk; global.TagTk = TagTk;
@ -167,4 +255,5 @@ if (typeof module == "object") {
global.EOFTk = EOFTk; global.EOFTk = EOFTk;
global.KV = KV; global.KV = KV;
global.Params = Params; global.Params = Params;
global.ParserValue = ParserValue;
} }

View file

@ -33,7 +33,7 @@ MWParserEnvironment.prototype.lookupKV = function ( kvs, key ) {
var kv; var kv;
for ( var i = 0, l = kvs.length; i < l; i++ ) { for ( var i = 0, l = kvs.length; i < l; i++ ) {
kv = kvs[i]; kv = kvs[i];
if ( kv.k === key ) { if ( kv.k.trim() === key ) {
// found, return it. // found, return it.
return kv; return kv;
} }
@ -115,7 +115,7 @@ MWParserEnvironment.prototype.KVtoHash = function ( kvs ) {
return res; return res;
}; };
MWParserEnvironment.prototype.setTokenRank = function ( token, rank ) { MWParserEnvironment.prototype.setTokenRank = function ( rank, token ) {
// convert string literal to string object // convert string literal to string object
if ( token.constructor === String && token.rank === undefined ) { if ( token.constructor === String && token.rank === undefined ) {
token = new String( token ); token = new String( token );
@ -126,19 +126,25 @@ MWParserEnvironment.prototype.setTokenRank = function ( token, rank ) {
// Strip 'end' tokens and trailing newlines // Strip 'end' tokens and trailing newlines
MWParserEnvironment.prototype.stripEOFTkfromTokens = function ( tokens ) { MWParserEnvironment.prototype.stripEOFTkfromTokens = function ( tokens ) {
this.dp( 'stripping end or whitespace tokens', tokens ); this.dp( 'stripping end or whitespace tokens' );
if ( ! tokens.length ) { if ( ! tokens.length ) {
return tokens; return tokens;
} }
// Strip 'end' tokens and trailing newlines // Strip 'end' tokens and trailing newlines
var l = tokens[tokens.length - 1]; var l = tokens[tokens.length - 1];
while ( tokens.length && if ( l.constructor === EOFTk || l.constructor === NlTk ||
( l.constructor === EOFTk || l.constructor === NlTk ) ( l.constructor === String && l.match( /^\s+$/ ) ) ) {
) var origTokens = tokens;
{ tokens = origTokens.slice();
this.dp( 'stripping end or whitespace tokens' ); tokens.rank = origTokens.rank;
tokens.pop(); while ( tokens.length &&
l = tokens[tokens.length - 1]; (( l.constructor === EOFTk || l.constructor === NlTk ) ||
( l.constructor === String && l.match( /^\s+$/ ) ) ) )
{
this.dp( 'stripping end or whitespace tokens' );
tokens.pop();
l = tokens[tokens.length - 1];
}
} }
return tokens; return tokens;
}; };
@ -269,8 +275,7 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens, strict ) {
if ( token === undefined ) { if ( token === undefined ) {
if ( this.debug ) { console.trace(); } if ( this.debug ) { console.trace(); }
this.tp( 'MWParserEnvironment.tokensToString, invalid token: ' + this.tp( 'MWParserEnvironment.tokensToString, invalid token: ' +
JSON.stringify( token ) + token, ' tokens:', tokens);
' tokens:' + JSON.stringify( tokens, null, 2 ));
continue; continue;
} }
if ( token.constructor === String ) { if ( token.constructor === String ) {
@ -282,13 +287,13 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens, strict ) {
return [out.join(''), tokens.slice( i )]; return [out.join(''), tokens.slice( i )];
} }
var tstring = JSON.stringify( token ); var tstring = JSON.stringify( token );
this.dp ( 'MWParserEnvironment.tokensToString, non-text token: ' + this.dp ( 'MWParserEnvironment.tokensToString, non-text token: ',
tstring + JSON.stringify( tokens, null, 2 ) ); tstring, tokens);
if ( this.debug ) { console.trace(); } if ( this.debug ) { console.trace(); }
//out.push( tstring ); //out.push( tstring );
} }
} }
//console.warn( 'MWParserEnvironment.tokensToString result: ' + out.join('') ); this.dp( 'MWParserEnvironment.tokensToString result: ', out );
return out.join(''); return out.join('');
}; };
@ -359,6 +364,7 @@ MWParserEnvironment.prototype.dp = function ( ) {
try { try {
console.warn( JSON.stringify( arguments, null, 2 ) ); console.warn( JSON.stringify( arguments, null, 2 ) );
} catch ( e ) { } catch ( e ) {
console.trace();
console.warn( e ); console.warn( e );
} }
} else { } else {

View file

@ -34,6 +34,7 @@ var fs = require('fs'),
Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer, Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer,
TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler, TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler,
AttributeExpander = require('./ext.core.AttributeExpander.js').AttributeExpander, AttributeExpander = require('./ext.core.AttributeExpander.js').AttributeExpander,
ListHandler = require('./ext.core.ListHandler.js').ListHandler,
LinkHandler = require('./ext.core.LinkHandler.js'), LinkHandler = require('./ext.core.LinkHandler.js'),
WikiLinkHandler = LinkHandler.WikiLinkHandler, WikiLinkHandler = LinkHandler.WikiLinkHandler,
ExternalLinkHandler = LinkHandler.ExternalLinkHandler, ExternalLinkHandler = LinkHandler.ExternalLinkHandler,
@ -86,8 +87,7 @@ ParserPipelineFactory.prototype.recipes = {
[ [
OnlyInclude, OnlyInclude,
IncludeOnly, IncludeOnly,
NoInclude, NoInclude
BehaviorSwitchHandler
// Insert TokenCollectors for extensions here (don't expand // Insert TokenCollectors for extensions here (don't expand
// templates in extension contents); wrap collected tokens in // templates in extension contents); wrap collected tokens in
// special extension token. // special extension token.
@ -108,12 +108,19 @@ ParserPipelineFactory.prototype.recipes = {
[ 2, 'tokens/x-mediawiki' ], [ 2, 'tokens/x-mediawiki' ],
[ [
TemplateHandler, TemplateHandler,
/* ExtensionHandler1, */ // using SFH_OBJECT_ARGS in PHP
// Expand attributes after templates to avoid expanding unused branches // Expand attributes after templates to avoid expanding unused branches
// No expansion of quotes, paragraphs etc in attributes, as in
// PHP parser- up to text/x-mediawiki/expanded only.
AttributeExpander, AttributeExpander,
WikiLinkHandler,
// now all attributes expanded to tokens or string
WikiLinkHandler, // more convenient after attribute expansion
ExternalLinkHandler ExternalLinkHandler
/* ExtensionHandler1, */ /* ExtensionHandler2, */ // using expanded args
/* ExtensionHandler2, */ // Finally expand attributes to plain text
] ]
] ]
], ],
@ -132,10 +139,19 @@ ParserPipelineFactory.prototype.recipes = {
[ [
// text/wiki-specific tokens // text/wiki-specific tokens
QuoteTransformer, QuoteTransformer,
ListHandler,
// before transforms that depend on behavior switches
// examples: toc generation, edit sections
BehaviorSwitchHandler,
// Synchronous extensions
Cite, // both before and after paragraph handler
// Paragraph wrapping
PostExpandParagraphHandler, PostExpandParagraphHandler,
/* Cite, */
/* ListHandler, */
Sanitizer Sanitizer
// SkipperUnpacker
] ]
], ],
@ -235,7 +251,7 @@ ParserPipelineFactory.prototype.returnPipeline = function ( type, pipe ) {
pipe.removeAllListeners( 'end' ); pipe.removeAllListeners( 'end' );
pipe.removeAllListeners( 'chunk' ); pipe.removeAllListeners( 'chunk' );
var cache = this.pipelineCache[type]; var cache = this.pipelineCache[type];
if ( cache.length < 5 ) { if ( cache.length < 8 ) {
cache.push( pipe ); cache.push( pipe );
} }
}; };

View file

@ -76,7 +76,7 @@ PegTokenizer.prototype.process = function( text, cacheKey ) {
//console.warn( JSON.stringify( maybeCached, null, 2 ) ); //console.warn( JSON.stringify( maybeCached, null, 2 ) );
for ( var i = 0, l = maybeCached.length; i < l; i++ ) { for ( var i = 0, l = maybeCached.length; i < l; i++ ) {
// emit a clone of this chunk // emit a clone of this chunk
this.emit('chunk', this.env.cloneTokens( maybeCached[i] )); this.emit('chunk', maybeCached[i] );
} }
this.emit('end'); this.emit('end');
return; return;

View file

@ -13,10 +13,10 @@
"assert": "0.x.x", "assert": "0.x.x",
"jsdom": "0.x.x", "jsdom": "0.x.x",
"pegjs": "0.x.x", "pegjs": "0.x.x",
"lru-cache": "1.x.x" "lru-cache": "1.x.x",
"async": "0.x.x"
}, },
"devDependencies": { "devDependencies": {
"coffee-script": "1.x.x",
"colors": "0.x.x", "colors": "0.x.x",
"diff": "1.x.x", "diff": "1.x.x",
"html5": "0.x.x" "html5": "0.x.x"

View file

@ -10,6 +10,7 @@ var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFacto
ParserEnv = require('./mediawiki.parser.environment.js').MWParserEnvironment, ParserEnv = require('./mediawiki.parser.environment.js').MWParserEnvironment,
ConvertDOMToLM = require('./mediawiki.LinearModelConverter.js').ConvertDOMToLM, ConvertDOMToLM = require('./mediawiki.LinearModelConverter.js').ConvertDOMToLM,
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter, DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter,
WikitextSerializer = require('./mediawiki.WikitextSerializer.js').WikitextSerializer,
optimist = require('optimist'); optimist = require('optimist');
( function() { ( function() {
@ -29,6 +30,11 @@ var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFacto
'boolean': true, 'boolean': true,
'default': false 'default': false
}, },
'wikitext': {
description: 'Output WikiText instead of HTML',
'boolean': true,
'default': false
},
'debug': { 'debug': {
description: 'Debug mode', description: 'Debug mode',
'boolean': true, 'boolean': true,
@ -118,6 +124,9 @@ var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFacto
null, null,
2 2
)); ));
} else if ( argv.wikitext ) {
new WikitextSerializer().serializeDOM( document.body,
process.stdout.write.bind( process.stdout ) );
} else { } else {
process.stdout.write( document.body.innerHTML ); process.stdout.write( document.body.innerHTML );
} }

View file

@ -131,143 +131,6 @@
}); });
}; };
/*
* Annotate a token stream with list items with appropriate list tokens
*
* XXX: Move this to a token handler in phase sync23! That way we can
* support list items from templates too.
*
* @static
* @method
* @param {[tokens]} Token stream with li tokens
* @returns {[tokens]} Token stream, possibly with additional list tokens
* */
var annotateList = function ( tokens ) {
var out = [], // List of tokens
bstack = [], // Bullet stack, previous element's listStyle
bnext = [], // Next element's listStyle
endtags = []; // Stack of end tags
var commonPrefixLength = function (x, y) {
var minLength = Math.min(x.length, y.length);
for(var i = 0; i < minLength; i++) {
if (x[i] != y[i])
break;
}
return i;
};
var pushList = function ( listName, itemName ) {
out.push( new TagTk( listName ));
out.push( new TagTk( itemName ));
endtags.push( new EndTagTk( listName ));
endtags.push( new EndTagTk( itemName ));
};
var popTags = function ( n ) {
for(;n > 0; n--) {
// push list item..
out.push(endtags.pop());
// and the list end tag
out.push(endtags.pop());
}
};
var isDlDd = function (a, b) {
var ab = [a,b].sort();
return (ab[0] === ':' && ab[1] === ';');
};
var doListItem = function ( bs, bn ) {
var prefixLen = commonPrefixLength (bs, bn);
var changeLen = Math.max(bs.length, bn.length) - prefixLen;
var prefix = bn.slice(0, prefixLen);
// emit close tag tokens for closed lists
if (changeLen === 0) {
var itemToken = endtags.pop();
out.push(itemToken);
out.push(new TagTk( itemToken.name ));
endtags.push(new EndTagTk( itemToken.name ));
} else if ( bs.length == bn.length
&& changeLen == 1
&& isDlDd( bs[prefixLen], bn[prefixLen] ) ) {
// handle dd/dt transitions
out.push(endtags.pop());
if( bn[prefixLen] == ';') {
var newName = 'dt';
} else {
var newName = 'dd';
}
out.push(new TagTk( newName ));
endtags.push(new EndTagTk( newName ));
} else {
popTags(bs.length - prefixLen);
if (prefixLen > 0 && bn.length == prefixLen ) {
var itemToken = endtags.pop();
out.push(itemToken);
out.push(new TagTk( itemToken.name ));
endtags.push(new EndTagTk( itemToken.name ));
}
for(var i = prefixLen; i < bn.length; i++) {
switch (bn[i]) {
case '*':
pushList('ul', 'li');
break;
case '#':
pushList('ol', 'li');
break;
case ';':
pushList('dl', 'dt');
break;
case ':':
pushList('dl', 'dd');
break;
default:
throw("Unknown node prefix " + prefix[i]);
}
}
}
};
for (var i = 0, length = tokens.length; i < length; i++) {
var token = tokens[i];
switch ( token.constructor ) {
case TagTk:
switch (token.name) {
case 'list':
// ignore token
break;
case 'listItem':
// convert listItem to list and list item tokens
bnext = token.bullets;
doListItem( bstack, bnext );
bstack = bnext;
break;
default:
// pass through all remaining start tags
out.push(token);
break;
}
break;
case EndTagTk:
if ( token.name == 'list' ) {
// pop all open list item tokens
popTags(bstack.length);
bstack = [];
} else {
out.push(token);
}
break;
default:
out.push(token);
break;
}
}
return out;
};
/** /**
* Determine if a string represents a valid ISBN-10 or ISBN-13 identifier * Determine if a string represents a valid ISBN-10 or ISBN-13 identifier
@ -702,7 +565,8 @@ extlink
new SelfclosingTagTk( 'extlink', [ new SelfclosingTagTk( 'extlink', [
new KV('href', target), new KV('href', target),
new KV('content', text) new KV('content', text)
] ) ],
{ type: 'extlink' })
]; ];
} }
/ "[" & { return stops.pop('extlink'); } / "[" & { return stops.pop('extlink'); }
@ -837,13 +701,12 @@ ipv6_address
* 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·} * 7: {{{{{{{·}}}}}}} → {·{{{·{{{·}}}·}}}·}
*/ */
tplarg_or_template tplarg_or_template
= = & '{{{{{{{' '{' tplarg_or_template '}'
! '{{{{{{{' ( / & ( '{{{' &'{{{' tplarg ) tplarg
& ( '{{{{{{' ([^}]+ / !'}}}' .)* '}}}}}}') tplarg // tplarg in template
/ & ( '{{{{{' ([^}]+ / !'}}}' .)* '}}}}}') template / & ( '{{' &'{{{' tplarg ) template
/ tplarg / tplarg
/ template / template
)
template template
= "{{" nl_comment_space* = "{{" nl_comment_space*
@ -875,14 +738,14 @@ tplarg
params:( nl_comment_space* params:( nl_comment_space*
'|' nl_comment_space* '|' nl_comment_space*
r:( r:(
&'}}' { return new KV( '', '') } &'}}}' { return new KV( '', '') }
/ p:template_param { return p } / p:template_param { return p }
) { return r } ) { return r }
)* )*
nl_comment_space* nl_comment_space*
"}}}" { "}}}" {
name = flatten( name ); name = flatten( name );
params.unshift( { k: '', v: name } ); params.unshift( new KV( name, '' ) );
var obj = new SelfclosingTagTk( 'templatearg', params ); var obj = new SelfclosingTagTk( 'templatearg', params );
//console.warn( 'tokenizer tplarg ' + JSON.stringify( obj, null, 2 )); //console.warn( 'tokenizer tplarg ' + JSON.stringify( obj, null, 2 ));
//console.warn('template arg @' + pos + '::' + input.substr(pos, 40) ); //console.warn('template arg @' + pos + '::' + input.substr(pos, 40) );
@ -1012,7 +875,7 @@ wikilink
contentPos: lcontent.pos contentPos: lcontent.pos
}; };
// XXX: Point to object with path, revision and input information // XXX: Point to object with path, revision and input information
obj.source = input; //obj.source = input;
//console.warn('lcontent: ' + JSON.stringify( lcontent, null, 2 ) ); //console.warn('lcontent: ' + JSON.stringify( lcontent, null, 2 ) );
// Deal with content. XXX: Properly support pipe-trick etc // Deal with content. XXX: Properly support pipe-trick etc
@ -1240,7 +1103,7 @@ xmlish_tag = nowiki / generic_tag
nowiki nowiki
= "<nowiki>" nc:nowiki_content "</nowiki>" { = "<nowiki>" nc:nowiki_content "</nowiki>" {
//console.warn( 'full nowiki return: ' + pp(nc)); //console.warn( 'full nowiki return: ' + pp(nc));
return nc; return [ new TagTk( 'nowiki' ) ].concat( nc, [ new EndTagTk( 'nowiki' ) ] );
} }
/ "<nowiki>" { / "<nowiki>" {
//console.warn('nowiki fallback'); //console.warn('nowiki fallback');
@ -1329,6 +1192,7 @@ generic_tag
res.dataAttribs = {}; res.dataAttribs = {};
} }
res.dataAttribs.sourceTagPos = [tagStartPos - 1, pos]; res.dataAttribs.sourceTagPos = [tagStartPos - 1, pos];
res.dataAttribs.stx = 'html';
return res; return res;
} }
@ -1438,12 +1302,7 @@ block_tag
/********************************************************* /*********************************************************
* Lists * Lists
*********************************************************/ *********************************************************/
lists = e:(dtdd / li) es:(sol (dtdd / li))* lists = (dtdd / li) (sol (dtdd / li))*
{
return annotateList( [ new TagTk( 'list' ) ]
.concat(flatten([e].concat(es))
,[ new EndTagTk( 'list' ) ]));
}
li = bullets:list_char+ li = bullets:list_char+
c:inlineline? c:inlineline?
@ -1457,7 +1316,7 @@ li = bullets:list_char+
} }
dtdd dtdd
= bullets:(!(";" !list_char) list_char)* = bullets:(!(";" !list_char) lc:list_char { return lc })*
";" ";"
& {return stops.inc('colon');} & {return stops.inc('colon');}
c:inlineline c:inlineline
@ -1476,7 +1335,8 @@ dtdd
// c[clen - 1].value = val.substr(0, val.length - 1) + "\u00a0"; // c[clen - 1].value = val.substr(0, val.length - 1) + "\u00a0";
// } // }
//} //}
bullets = bullets.join('');
var li = new TagTk( 'listItem' ); var li = new TagTk( 'listItem' );
li.bullets = bullets + ";"; li.bullets = bullets + ";";
var li2 = new TagTk( 'listItem' ); var li2 = new TagTk( 'listItem' );
@ -1571,7 +1431,7 @@ table_row_tag
table_data_tags table_data_tags
= pipe = pipe
td:table_data_tag td:table_data_tag
tds:( pipe_pipe tdt:table_data_tag { return tdt } )* { tds:( pipe_pipe tdt:table_data_tag { tdt[0].dataAttribs.t_stx = 'row'; return tdt } )* {
return td.concat(tds); return td.concat(tds);
} }
@ -1595,7 +1455,7 @@ table_heading_tags
= //& { console.warn( 'th enter @' + input.substr(pos, 10)); return true; } = //& { console.warn( 'th enter @' + input.substr(pos, 10)); return true; }
"!" "!"
th:table_heading_tag th:table_heading_tag
ths:( "!!" tht:table_heading_tag { return tht } )* { ths:( "!!" tht:table_heading_tag { tht[0].dataAttribs.t_stx = 'row'; return tht } )* {
//console.warn( 'thts: ' + pp(th.concat(ths))); //console.warn( 'thts: ' + pp(th.concat(ths)));
return th.concat(ths); return th.concat(ths);
} }

View file

@ -1,30 +0,0 @@
module.exports = (HTML5) ->
htmlparser = new HTML5.Parser
htmlparser.tree.elementInActiveFormattingElements = (name) ->
els = @activeFormattingElements
i = els.length - 1
while i >= 0
break if els[i].type is HTML5.Marker.type
return els[i] if els[i].tagName.toLowerCase() is name
i--
return false
htmlparser.tree.reconstructActiveFormattingElements = ->
return if @activeFormattingElements.length is 0
i = @activeFormattingElements.length - 1
entry = @activeFormattingElements[i]
return if entry.type is HTML5.Marker.type or @open_elements.indexOf(entry) isnt -1
while entry.type isnt HTML5.Marker.type and @open_elements.indexOf(entry) is -1
i -= 1
entry = @activeFormattingElements[i]
break unless entry
loop
i += 1
clone = @activeFormattingElements[i].cloneNode()
element = @insert_element(clone.tagName, clone.attributes)
@activeFormattingElements[i] = element
break if element is @activeFormattingElements.last()
return
return htmlparser

View file

@ -18,7 +18,7 @@ testWhiteList["Link containing double-single-quotes '' in text embedded in itali
testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>"; testWhiteList["External link containing double-single-quotes in text embedded in italics (bug 4598 sanity check)"] = "<p><i>Some <a href=\"http://example.com/\">pretty </a></i><a href=\"http://example.com/\">italics<i> and stuff</i></a><i>!</i></p>";
// This is a rare edge case, and the new behavior is arguably more consistent // This is a rare edge case, and the new behavior is arguably more consistent
testWhiteList["5 quotes, code coverage +1 line"] = "<p>'<i></i></p>"; testWhiteList["5 quotes, code coverage +1 line"] = "<p><i><b></b></i></p>";
// The comment in the test already suggests this result as correct, but // The comment in the test already suggests this result as correct, but
// supplies the old result without preformatting. // supplies the old result without preformatting.

View file

@ -61,6 +61,7 @@ var testWhiteList = require(__dirname + '/parserTests-whitelist.js').testWhiteLi
_import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']); _import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']);
_import(pj('parser', 'mediawiki.parser.js'), ['ParserPipelineFactory']); _import(pj('parser', 'mediawiki.parser.js'), ['ParserPipelineFactory']);
_import(pj('parser', 'mediawiki.WikitextSerializer.js'), ['WikitextSerializer']);
// WikiDom and serializers // WikiDom and serializers
//_require(pj('es', 'es.js')); //_require(pj('es', 'es.js'));
@ -122,6 +123,11 @@ function ParserTests () {
'default': false, 'default': false,
'boolean': true 'boolean': true
}, },
'roundtrip': {
description: 'Roundtrip testing: Wikitext -> DOM -> wikitext',
'default': false,
'boolean': true
},
'debug': { 'debug': {
description: 'Print debugging information', description: 'Print debugging information',
'default': false, 'default': false,
@ -181,12 +187,7 @@ function ParserTests () {
this.articles = {}; this.articles = {};
//this.htmlwindow = jsdom.jsdom(null, null, {parser: HTML5}).createWindow(); this.htmlparser = new HTML5.Parser();
//this.htmlparser = new HTML5.Parser({document: this.htmlwindow.document});
//this.htmlparser = new HTML5.Parser()
// Use a patched version until https://github.com/aredridel/html5/issues/44 is merged
require('coffee-script');
this.htmlparser = require(__dirname+'/__patched-html5-parser')(HTML5);
// Test statistics // Test statistics
this.passedTests = 0; this.passedTests = 0;
@ -324,9 +325,9 @@ ParserTests.prototype.normalizeHTML = function (source) {
// known-ok differences. // known-ok differences.
ParserTests.prototype.normalizeOut = function ( out ) { ParserTests.prototype.normalizeOut = function ( out ) {
// TODO: Do not strip newlines in pre and nowiki blocks! // TODO: Do not strip newlines in pre and nowiki blocks!
return out.replace(/[\r\n]| data-mw="[^">]*"/g, '') return out.replace(/[\r\n]| (data-mw|typeof|resource|rel|prefix|about|rev|datatype|inlist|property|vocab|content)="[^">]*"/g, '')
.replace(/<!--.*?-->\n?/gm, '') .replace(/<!--.*?-->\n?/gm, '')
.replace(/<\/?meta[^>]*>/g, ''); .replace(/<\/?(meta|nowiki)[^>]*>/g, '');
}; };
ParserTests.prototype.formatHTML = function ( source ) { ParserTests.prototype.formatHTML = function ( source ) {
@ -387,12 +388,17 @@ ParserTests.prototype.processResult = function ( index, item, doc ) {
this.failParseTests++; this.failParseTests++;
console.log('PARSE FAIL', res.err); console.log('PARSE FAIL', res.err);
} else { } else {
// Check the result vs. the expected result. if (this.argv.roundtrip) {
this.checkResult( item, doc.body.innerHTML ); var rt_wikiText = new WikitextSerializer().serializeDOM(doc.body);
this.checkRoundTripResult(item, rt_wikiText);
} else {
// Check the result vs. the expected result.
this.checkResult( item, doc.body.innerHTML );
if ( this.argv.wikidom ) { if ( this.argv.wikidom ) {
// Test HTML DOM -> WikiDOM conversion // Test HTML DOM -> WikiDOM conversion
this.printWikiDom( parserPipeline.getWikiDom() ); this.printWikiDom( parserPipeline.getWikiDom() );
}
} }
} }
@ -470,6 +476,55 @@ ParserTests.prototype.checkResult = function ( item, out ) {
} }
}; };
ParserTests.prototype.checkRoundTripResult = function ( item, out ) {
var normalizedOut = out; // FIXME: normalization not in place yet
var normalizedExpected = item.input; // FIXME: normalization not in place yet
if ( normalizedOut !== normalizedExpected ) {
this.printTitle( item, this.argv.quick );
this.failOutputTests++;
if( !this.argv.quick ) {
console.log('RAW EXPECTED'.cyan + ':');
console.log(item.input + "\n");
console.log('RAW RENDERED'.cyan + ':');
console.log(out + "\n");
console.log('NORMALIZED EXPECTED'.magenta + ':');
console.log(normalizedExpected + "\n");
console.log('NORMALIZED RENDERED'.magenta + ':');
console.log(normalizedOut + "\n");
var patch = jsDiff.createPatch('wikitext.txt', normalizedExpected, normalizedOut, 'before', 'after');
console.log('DIFF'.cyan +': ');
// Strip the header from the patch, we know how diffs work..
patch = patch.replace(/^[^\n]*\n[^\n]*\n[^\n]*\n[^\n]*\n/, '');
var colored_diff = patch.split( '\n' ).map( function(line) {
// Add some colors to diff output
switch( line.charAt(0) ) {
case '-':
return line.red;
case '+':
return line.blue;
default:
return line;
}
}).join( "\n" );
console.log( colored_diff );
}
} else {
this.passedTests++;
if( !this.argv.quiet ) {
console.log( 'PASSED'.green + ': ' + item.title.yellow );
}
}
};
/** /**
* Print out a WikiDom conversion of the HTML DOM * Print out a WikiDom conversion of the HTML DOM
@ -587,9 +642,10 @@ ParserTests.prototype.processCase = function ( i ) {
break; break;
case 'hooks': case 'hooks':
console.warn('parserTests: Unhandled hook ' + JSON.stringify( item ) ); console.warn('parserTests: Unhandled hook ' + JSON.stringify( item ) );
break;
case 'functionhooks': case 'functionhooks':
console.warn('parserTests: Unhandled functionhook ' console.warn('parserTests: Unhandled functionhook ' + JSON.stringify( item ) );
+ JSON.stringify( item ) ); break;
default: default:
this.comments = []; this.comments = [];
process.nextTick( this.processCase.bind( this, i + 1 ) ); process.nextTick( this.processCase.bind( this, i + 1 ) );