mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-15 18:39:52 +00:00
A bit more documentation and naming cleanup in the tokenizer wrapper.
This commit is contained in:
parent
459c4fa271
commit
b1e131d568
Notes:
Gabriel Wicke
2012-03-08 09:00:45 +00:00
|
@ -249,10 +249,10 @@ ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) {
|
||||||
//console.warn('extlink href: ' + href );
|
//console.warn('extlink href: ' + href );
|
||||||
//console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
|
//console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
|
||||||
// validate the href
|
// validate the href
|
||||||
if ( this.imageParser.parseURL( href ) ) {
|
if ( this.imageParser.tokenizeURL( href ) ) {
|
||||||
if ( content.length === 1 &&
|
if ( content.length === 1 &&
|
||||||
content[0].constructor === String &&
|
content[0].constructor === String &&
|
||||||
this.imageParser.parseURL( content[0] ) &&
|
this.imageParser.tokenizeURL( content[0] ) &&
|
||||||
this._isImageLink( content[0] ) )
|
this._isImageLink( content[0] ) )
|
||||||
{
|
{
|
||||||
var src = content[0];
|
var src = content[0];
|
||||||
|
|
|
@ -32,16 +32,31 @@ PegTokenizer.src = false;
|
||||||
*/
|
*/
|
||||||
PegTokenizer.prototype.process = function( text ) {
|
PegTokenizer.prototype.process = function( text ) {
|
||||||
var out, err;
|
var out, err;
|
||||||
if ( !this.parser ) {
|
if ( !this.tokenizer ) {
|
||||||
|
// Construct a singleton static tokenizer.
|
||||||
var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
|
var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
|
||||||
this.src = fs.readFileSync( pegSrcPath, 'utf8' );
|
this.src = fs.readFileSync( pegSrcPath, 'utf8' );
|
||||||
// Only create a single parser, as parse() is a static method.
|
var tokenizerSource = PEG.buildParser(this.src).toSource();
|
||||||
var parserSource = PEG.buildParser(this.src).toSource();
|
|
||||||
//console.warn( parserSource );
|
/* We patch the generated source to assign the arguments array for the
|
||||||
parserSource = parserSource.replace( 'parse: function(input, startRule) {',
|
* parse function to a function-scoped variable. We use this to pass
|
||||||
|
* in callbacks and other information, which can be used from actions
|
||||||
|
* run when matching a production. In particular, we pass in a
|
||||||
|
* callback called for a chunk of tokens in toplevelblock. Setting this
|
||||||
|
* callback per call to parse() keeps the tokenizer reentrant, so that it
|
||||||
|
* can be reused to expand templates while a main parse is ongoing.
|
||||||
|
* PEG tokenizer construction is very expensive, so having a single
|
||||||
|
* reentrant tokenizer is a big win.
|
||||||
|
*
|
||||||
|
* We could also make modules available to the tokenizer by prepending
|
||||||
|
* requires to the source.
|
||||||
|
*/
|
||||||
|
tokenizerSource = tokenizerSource.replace( 'parse: function(input, startRule) {',
|
||||||
'parse: function(input, startRule) { var __parseArgs = arguments;' );
|
'parse: function(input, startRule) { var __parseArgs = arguments;' );
|
||||||
//console.warn( parserSource );
|
//console.warn( tokenizerSource );
|
||||||
PegTokenizer.prototype.parser = eval( parserSource );
|
PegTokenizer.prototype.tokenizer = eval( tokenizerSource );
|
||||||
|
// alias the parse method
|
||||||
|
this.tokenizer.tokenize = this.tokenizer.parse;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some input normalization: force a trailing newline
|
// Some input normalization: force a trailing newline
|
||||||
|
@ -52,7 +67,7 @@ PegTokenizer.prototype.process = function( text ) {
|
||||||
// XXX: Commented out exception handling during development to get
|
// XXX: Commented out exception handling during development to get
|
||||||
// reasonable traces.
|
// reasonable traces.
|
||||||
//try {
|
//try {
|
||||||
this.parser.parse(text, 'start',
|
this.tokenizer.tokenize(text, 'start',
|
||||||
// callback
|
// callback
|
||||||
this.emit.bind( this, 'chunk' ),
|
this.emit.bind( this, 'chunk' ),
|
||||||
// inline break test
|
// inline break test
|
||||||
|
@ -68,12 +83,15 @@ PegTokenizer.prototype.process = function( text ) {
|
||||||
};
|
};
|
||||||
|
|
||||||
PegTokenizer.prototype.processImageOptions = function( text ) {
|
PegTokenizer.prototype.processImageOptions = function( text ) {
|
||||||
return this.parser.parse(text, 'img_options', null, this );
|
return this.tokenizer.tokenize(text, 'img_options', null, this );
|
||||||
};
|
};
|
||||||
|
|
||||||
PegTokenizer.prototype.parseURL = function( text ) {
|
/**
|
||||||
|
* Tokenize a URL
|
||||||
|
*/
|
||||||
|
PegTokenizer.prototype.tokenizeURL = function( text ) {
|
||||||
try {
|
try {
|
||||||
return this.parser.parse(text, 'url', null, this );
|
return this.tokenizer.tokenize(text, 'url', null, this );
|
||||||
} catch ( e ) {
|
} catch ( e ) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue