mediawiki-extensions-Visual.../modules/parser/ext.core.LinkHandler.js

465 lines
12 KiB
JavaScript
Raw Normal View History

/**
* Simple link handler. Registers after template expansions, as an
* asynchronous transform.
*
* @author Gabriel Wicke <gwicke@wikimedia.org>
*
* TODO: keep round-trip information in meta tag or the like
*/
var jshashes = require('jshashes'),
PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer;
function WikiLinkHandler( manager, isInclude ) {
this.manager = manager;
this.manager.addTransform( this.onWikiLink.bind( this ), this.rank, 'tag', 'wikilink' );
// create a new peg parser for image options..
if ( !this.imageParser ) {
// Actually the regular tokenizer, but we'll call it with the
// img_options production only.
WikiLinkHandler.prototype.imageParser = new PegTokenizer();
}
}
WikiLinkHandler.prototype.rank = 1.15; // after AttributeExpander
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
WikiLinkHandler.prototype.onWikiLink = function ( token, frame, cb ) {
var env = this.manager.env,
href = token.attribs[0].v;
var title = this.manager.env.makeTitleFromPrefixedText(
env.tokensToString( href )
);
if ( title.ns.isFile() ) {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( this.renderFile( token, frame, cb, title ) );
} else if ( title.ns.isCategory() ) {
// TODO: implement
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( { } );
} else {
// Check if page exists
//
//console.warn( 'title: ' + JSON.stringify( title ) );
var normalizedHref = title.makeLink(),
obj = new TagTk( 'a',
[
new KV( 'href', normalizedHref ),
new KV('rel', 'mw:wikiLink')
]
, token.dataAttribs
),
content = token.attribs.slice(2);
if ( href !== normalizedHref ) {
obj.dataAttribs.sHref = href;
}
//console.warn('content: ' + JSON.stringify( content, null, 2 ) );
// XXX: handle trail
if ( content.length ) {
var out = [];
for ( var i = 0, l = content.length; i < l ; i++ ) {
out = out.concat( content[i].v );
if ( i < l - 1 ) {
out.push( '|' );
}
}
content = out;
} else {
content = [ env.decodeURI( env.tokensToString( href ) ) ];
obj.dataAttribs.gc = 1;
}
var tail = token.attribs[1].v;
if ( tail ) {
obj.dataAttribs.tail = tail;
content.push( tail );
}
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb ( {
tokens: [obj].concat( content, [ new EndTagTk( 'a' ) ] )
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
} );
}
};
WikiLinkHandler.prototype._simpleImageOptions = {
// halign
'left': 'halign',
'right': 'halign',
'center': 'halign',
'float': 'halign',
'none': 'halign',
// valign
'baseline': 'valign',
'sub': 'valign',
'super': 'valign',
'top': 'valign',
'text-top': 'valign',
'middle': 'valign',
'bottom': 'valign',
'text-bottom': 'valign',
// format
'border': 'format',
'frameless': 'format',
'frame': 'format',
'thumbnail': 'format',
'thumb': 'format'
};
WikiLinkHandler.prototype._prefixImageOptions = {
'link': 'link',
'alt': 'alt',
'page': 'page',
'thumbnail': 'thumb',
'thumb': 'thumb',
'upright': 'aspect'
};
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
WikiLinkHandler.prototype.renderFile = function ( token, frame, cb, title ) {
var env = this.manager.env;
// distinguish media types
// if image: parse options
2012-03-02 13:36:37 +00:00
// Slice off the target and tail
var content = token.attribs.slice(2);
2012-03-02 13:36:37 +00:00
var MD5 = new jshashes.MD5(),
hash = MD5.hex( title.key ),
// TODO: Hackhack.. Move to proper test harness setup!
path = [ this.manager.env.wgUploadPath, hash[0],
hash.substr(0, 2), title.key ].join('/');
// extract options
var options = [],
oHash = {},
caption = [];
for( var i = 0, l = content.length; i<l; i++ ) {
var oContent = content[i],
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
oText = this.manager.env.tokensToString( oContent.v, true );
//console.log( JSON.stringify( oText, null, 2 ) );
if ( oText.constructor === String ) {
oText = oText.trim();
if ( this._simpleImageOptions[ oText.toLowerCase() ] ) {
options.push( new KV( this._simpleImageOptions[ oText.toLowerCase() ],
oText ) );
oHash[ this._simpleImageOptions[ oText ] ] = oText;
continue;
} else {
var maybeSize = oText.match(/^(\d*)(?:x(\d+))?px$/);
//console.log( maybeSize );
if ( maybeSize !== null ) {
var x = maybeSize[1],
y = maybeSize[2];
if ( x !== undefined ) {
options.push(new KV( 'width', x ) );
oHash.width = x;
}
if ( y !== undefined ) {
options.push(new KV( 'height', y ) );
oHash.height = y;
}
} else {
var bits = oText.split( '=', 2 ),
key = this._prefixImageOptions[ bits[0].trim().toLowerCase() ];
if ( bits[0] && key) {
oHash[key] = bits[1];
options.push( new KV( key, bits[1] ) );
//console.warn('handle prefix ' + bits );
} else {
// neither simple nor prefix option, add original
// tokens to caption.
caption = caption.concat( oContent.v );
}
}
}
} else {
caption = caption.concat( oContent.v );
}
}
//var contentPos = token.dataAttribs.contentPos;
//var optionSource = token.source.substr( contentPos[0], contentPos[1] - contentPos[0] );
//console.log( 'optionSource: ' + optionSource );
2012-03-02 14:19:33 +00:00
// XXX: The trouble with re-parsing is the need to re-expand templates.
// Figure out how often non-image links contain image-like parameters!
//var options = this.imageParser.processImageOptions( optionSource );
2012-03-02 13:36:37 +00:00
//console.log( JSON.stringify( options, null, 2 ) );
// XXX: check if the file exists, generate thumbnail, get size
// XXX: render according to mode (inline, thumb, framed etc)
if ( oHash.format && ( oHash.format === 'thumb' || oHash.format === 'thumbnail') ) {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
return this.renderThumb( token, this.manager, cb, title, path, caption, oHash, options );
} else {
// TODO: get /wiki from config!
var a = new TagTk( 'a', [ new KV( 'href', title.makeLink() ) ] );
a.dataAttribs = token.dataAttribs;
var width, height;
if ( ! oHash.height && ! oHash.width ) {
width = '200px';
} else {
width = oHash.width;
height = oHash.height;
}
var img = new SelfclosingTagTk( 'img',
[
// FIXME!
new KV( 'height', height || '' ),
new KV( 'width', width || '' ),
new KV( 'src', path ),
new KV( 'alt', oHash.alt || title.key )
] );
return { tokens: [ a, img, new EndTagTk( 'a' )] };
}
};
WikiLinkHandler.prototype.renderThumb = function ( token, manager, cb, title, path, caption, oHash, options ) {
// TODO: get /wiki from config!
var a = new TagTk( 'a', [ new KV( 'href', title.makeLink() ) ] );
a.dataAttribs = token.dataAttribs;
a.dataAttribs.optionHash = oHash;
a.dataAttribs.optionList = options;
var width = 165;
// Handle upright
if ( 'aspect' in oHash ) {
if ( oHash.aspect > 0 ) {
width = width * oHash.aspect;
} else {
width *= 0.75;
}
}
var figurestyle = "width: " + (width + 5) + "px;",
figureclass = "thumb tright thumbinner";
// set horizontal alignment
if ( oHash.halign ) {
if ( oHash.halign === 'left' ) {
figurestyle += ' float: left;';
figureclass = "thumb tleft thumbinner";
} else if ( oHash.halign === 'center' ) {
figureclass = "thumb center thumbinner";
} else if ( oHash.halign === 'none' ) {
figureclass = "thumb thumbinner";
} else {
figurestyle += ' float: right;';
}
} else {
figurestyle += ' float: right;';
}
// XXX: set vertical alignment (valign)
// XXX: support other formats (border, frameless, frame)
// XXX: support prefixes
var thumb =
[
new TagTk(
'figure',
[
new KV('class', figureclass),
new KV('style', figurestyle),
new KV('typeof', 'http://mediawiki.org/rdf/Thumb'),
// XXX: define this globally?
new KV('prefix', "mw: http://mediawiki.org/rdf/terms/")
]
),
new TagTk(
'a',
[
new KV('href', title.makeLink()),
new KV('class', 'image')
]
),
new SelfclosingTagTk(
'img',
[
new KV('src', path),
new KV('width', width + 'px'),
//new KV('height', '160px'),
new KV('class', 'thumbimage'),
new KV('alt', oHash.alt || title.key ),
// Add resource as CURIE- needs global default prefix
// definition.
new KV('resource', '[:' + title.getPrefixedText() + ']')
]
),
new EndTagTk( 'a' ),
new SelfclosingTagTk (
'a',
[
new KV('href', title.makeLink()),
new KV('class', 'internal sprite details magnify'),
new KV('title', 'View photo details')
]
),
new TagTk( 'figcaption',
[
new KV('class', 'thumbcaption'),
new KV('property', 'mw:thumbcaption')
] )
].concat(
caption,
[
new EndTagTk( 'figcaption' ),
new EndTagTk( 'figure' )
]
);
// set round-trip information on the wrapping figure token
thumb[0].dataAttribs = token.dataAttribs;
/*
* Wikia DOM:
<figure class="thumb tright thumbinner" style="width:270px;">
<a href="Delorean.jpg" class="image" data-image-name="DeLorean.jpg" id="DeLorean-jpg">
<img alt="" src="Delorean.jpg" width="268" height="123" class="thumbimage">
</a>
<a href="File:DeLorean.jpg" class="internal sprite details magnify" title="View photo details"></a>
<figcaption class="thumbcaption">
A DeLorean DMC-12 from the front with the gull-wing doors open
<table><tr><td>test</td></tr></table>
Continuation of the caption
</figcaption>
<div class="picture-attribution">
<img src="Christian-Avatar.png" width="16" height="16" class="avatar" alt="Christian">Added by <a href="User:Christian">Christian</a>
</div>
</figure>
*/
//console.warn( 'thumbtokens: ' + JSON.stringify( thumb, null, 2 ) );
return { tokens: thumb };
};
function ExternalLinkHandler( manager, isInclude ) {
this.manager = manager;
this.manager.addTransform( this.onUrlLink.bind( this ), this.rank, 'tag', 'urllink' );
this.manager.addTransform( this.onExtLink.bind( this ),
this.rank - 0.001, 'tag', 'extlink' );
this.manager.addTransform( this.onEnd.bind( this ),
this.rank, 'end' );
// create a new peg parser for image options..
if ( !this.imageParser ) {
// Actually the regular tokenizer, but we'll call it with the
// img_options production only.
ExternalLinkHandler.prototype.imageParser = new PegTokenizer();
}
this._reset();
}
ExternalLinkHandler.prototype._reset = function () {
this.linkCount = 1;
};
ExternalLinkHandler.prototype.rank = 1.15;
ExternalLinkHandler.prototype._imageExtensions = {
'jpg': true,
'png': true,
'gif': true,
'svg': true
};
ExternalLinkHandler.prototype._isImageLink = function ( href ) {
var bits = href.split( '.' );
return bits.length > 1 &&
this._imageExtensions[ bits[bits.length - 1] ] &&
href.match( /^https?:\/\// );
};
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
ExternalLinkHandler.prototype.onUrlLink = function ( token, frame, cb ) {
var env = this.manager.env,
href = env.sanitizeURI(
env.tokensToString( env.lookupKV( token.attribs, 'href' ).v )
);
if ( this._isImageLink( href ) ) {
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
cb( { tokens: [ new SelfclosingTagTk( 'img',
[
new KV( 'src', href ),
new KV( 'alt', href.split('/').last() ),
new KV('rel', 'mw:externalImage')
],
{ stx: 'urllink' }
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
)
]
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
} );
} else {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( {
tokens: [
new TagTk( 'a',
[
new KV( 'href', href ),
new KV('rel', 'mw:extLink')
],
{ stx: 'urllink' } ),
href,
new EndTagTk( 'a' )
]
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
} );
}
};
// Bracketed external link
ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) {
var env = this.manager.env,
href = env.tokensToString( env.lookupKV( token.attribs, 'href' ).v ),
content= env.lookupKV( token.attribs, 'content' ).v;
href = env.sanitizeURI( href );
//console.warn('extlink href: ' + href );
//console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
// validate the href
if ( ! content.length ) {
content = ['[' + this.linkCount + ']'];
this.linkCount++;
}
if ( this.imageParser.tokenizeURL( href ) ) {
if ( content.length === 1 &&
content[0].constructor === String &&
this.imageParser.tokenizeURL( content[0] ) &&
this._isImageLink( content[0] ) )
{
var src = content[0];
content = [ new SelfclosingTagTk( 'img',
[
new KV( 'src', src ),
new KV( 'alt', src.split('/').last() )
],
{ type: 'extlink' })
];
}
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( {
tokens:
[
new TagTk ( 'a',
[
new KV('href', href),
new KV('rel', 'mw:extLink')
],
token.dataAttribs
)
].concat( content, [ new EndTagTk( 'a' )])
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
} );
} else {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( {
tokens: ['[', href, ' ' ].concat( content, [']'] )
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
} );
}
};
ExternalLinkHandler.prototype.onEnd = function ( token, manager, cb ) {
this._reset();
cb( { tokens: [ token ] } );
};
if (typeof module == "object") {
module.exports.WikiLinkHandler = WikiLinkHandler;
module.exports.ExternalLinkHandler = ExternalLinkHandler;
}