A bit more documentation and naming cleanup in the tokenizer wrapper.

This commit is contained in:
Gabriel Wicke 2012-03-08 09:00:45 +00:00
parent 459c4fa271
commit b1e131d568
Notes: Gabriel Wicke 2012-03-08 09:00:45 +00:00
2 changed files with 31 additions and 13 deletions

View file

@ -249,10 +249,10 @@ ExternalLinkHandler.prototype.onExtLink = function ( token, manager, cb ) {
//console.warn('extlink href: ' + href ); //console.warn('extlink href: ' + href );
//console.warn( 'content: ' + JSON.stringify( content, null, 2 ) ); //console.warn( 'content: ' + JSON.stringify( content, null, 2 ) );
// validate the href // validate the href
if ( this.imageParser.parseURL( href ) ) { if ( this.imageParser.tokenizeURL( href ) ) {
if ( content.length === 1 && if ( content.length === 1 &&
content[0].constructor === String && content[0].constructor === String &&
this.imageParser.parseURL( content[0] ) && this.imageParser.tokenizeURL( content[0] ) &&
this._isImageLink( content[0] ) ) this._isImageLink( content[0] ) )
{ {
var src = content[0]; var src = content[0];

View file

@ -32,16 +32,31 @@ PegTokenizer.src = false;
*/ */
PegTokenizer.prototype.process = function( text ) { PegTokenizer.prototype.process = function( text ) {
var out, err; var out, err;
if ( !this.parser ) { if ( !this.tokenizer ) {
// Construct a singleton static tokenizer.
var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' ); var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
this.src = fs.readFileSync( pegSrcPath, 'utf8' ); this.src = fs.readFileSync( pegSrcPath, 'utf8' );
// Only create a single parser, as parse() is a static method. var tokenizerSource = PEG.buildParser(this.src).toSource();
var parserSource = PEG.buildParser(this.src).toSource();
//console.warn( parserSource ); /* We patch the generated source to assign the arguments array for the
parserSource = parserSource.replace( 'parse: function(input, startRule) {', * parse function to a function-scoped variable. We use this to pass
* in callbacks and other information, which can be used from actions
* run when matching a production. In particular, we pass in a
* callback called for a chunk of tokens in toplevelblock. Setting this
* callback per call to parse() keeps the tokenizer reentrant, so that it
* can be reused to expand templates while a main parse is ongoing.
* PEG tokenizer construction is very expensive, so having a single
* reentrant tokenizer is a big win.
*
* We could also make modules available to the tokenizer by prepending
* requires to the source.
*/
tokenizerSource = tokenizerSource.replace( 'parse: function(input, startRule) {',
'parse: function(input, startRule) { var __parseArgs = arguments;' ); 'parse: function(input, startRule) { var __parseArgs = arguments;' );
//console.warn( parserSource ); //console.warn( tokenizerSource );
PegTokenizer.prototype.parser = eval( parserSource ); PegTokenizer.prototype.tokenizer = eval( tokenizerSource );
// alias the parse method
this.tokenizer.tokenize = this.tokenizer.parse;
} }
// Some input normalization: force a trailing newline // Some input normalization: force a trailing newline
@ -52,7 +67,7 @@ PegTokenizer.prototype.process = function( text ) {
// XXX: Commented out exception handling during development to get // XXX: Commented out exception handling during development to get
// reasonable traces. // reasonable traces.
//try { //try {
this.parser.parse(text, 'start', this.tokenizer.tokenize(text, 'start',
// callback // callback
this.emit.bind( this, 'chunk' ), this.emit.bind( this, 'chunk' ),
// inline break test // inline break test
@ -68,12 +83,15 @@ PegTokenizer.prototype.process = function( text ) {
}; };
PegTokenizer.prototype.processImageOptions = function( text ) { PegTokenizer.prototype.processImageOptions = function( text ) {
return this.parser.parse(text, 'img_options', null, this ); return this.tokenizer.tokenize(text, 'img_options', null, this );
}; };
PegTokenizer.prototype.parseURL = function( text ) { /**
* Tokenize a URL
*/
PegTokenizer.prototype.tokenizeURL = function( text ) {
try { try {
return this.parser.parse(text, 'url', null, this ); return this.tokenizer.tokenize(text, 'url', null, this );
} catch ( e ) { } catch ( e ) {
return false; return false;
} }