2011-12-08 10:59:44 +00:00
|
|
|
/* Front-end/Wrapper for a particular tree builder, in this case the
|
|
|
|
* parser/tree builder from the node 'html5' module. Feed it tokens using
|
|
|
|
* processToken, and it will build you a DOM tree retrievable using .document
|
|
|
|
* or .body(). */
|
2011-11-18 14:00:14 +00:00
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
var events = require('events'),
|
|
|
|
HTML5 = require('./html5/index');
|
2011-11-18 14:00:14 +00:00
|
|
|
|
2011-11-29 15:11:51 +00:00
|
|
|
FauxHTML5 = {};
|
2011-11-18 14:00:14 +00:00
|
|
|
|
|
|
|
|
2011-12-08 10:37:18 +00:00
|
|
|
FauxHTML5.TreeBuilder = function ( ) {
|
|
|
|
// The parser we are going to emit our tokens to
|
2011-11-18 14:00:14 +00:00
|
|
|
this.parser = new HTML5.Parser();
|
2011-12-08 10:37:18 +00:00
|
|
|
|
|
|
|
// Sets up the parser
|
2011-11-18 14:00:14 +00:00
|
|
|
this.parser.parse(this);
|
2012-01-03 18:44:31 +00:00
|
|
|
|
|
|
|
// implicitly start a new document
|
2012-02-01 18:37:48 +00:00
|
|
|
this.processToken(new TagTk( 'body' ));
|
2011-11-29 15:11:51 +00:00
|
|
|
};
|
2011-11-18 14:00:14 +00:00
|
|
|
|
2012-01-04 12:28:41 +00:00
|
|
|
// Inherit from EventEmitter
|
2011-12-08 10:37:18 +00:00
|
|
|
FauxHTML5.TreeBuilder.prototype = new events.EventEmitter();
|
2012-01-04 12:28:41 +00:00
|
|
|
FauxHTML5.TreeBuilder.prototype.constructor = FauxHTML5.TreeBuilder;
|
2011-11-18 14:00:14 +00:00
|
|
|
|
2012-01-04 11:00:54 +00:00
|
|
|
/**
|
|
|
|
* Register for (token) 'chunk' and 'end' events from a token emitter,
|
|
|
|
* normally the TokenTransformDispatcher.
|
|
|
|
*/
|
|
|
|
FauxHTML5.TreeBuilder.prototype.listenForTokensFrom = function ( emitter ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
emitter.addListener('chunk', this.onChunk.bind( this ) );
|
|
|
|
emitter.addListener('end', this.onEnd.bind( this ) );
|
|
|
|
};
|
|
|
|
|
|
|
|
FauxHTML5.TreeBuilder.prototype.onChunk = function ( tokens ) {
|
2012-02-14 20:56:14 +00:00
|
|
|
//console.warn( 'chunk: ' + JSON.stringify( tokens, null, 2 ) );
|
2012-01-03 18:44:31 +00:00
|
|
|
for (var i = 0, length = tokens.length; i < length; i++) {
|
|
|
|
this.processToken(tokens[i]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
FauxHTML5.TreeBuilder.prototype.onEnd = function ( ) {
|
2012-02-14 20:56:14 +00:00
|
|
|
//console.warn('Fauxhtml5 onEnd');
|
2012-01-03 18:44:31 +00:00
|
|
|
// FIXME HACK: For some reason the end token is not processed sometimes,
|
|
|
|
// which normally fixes the body reference up.
|
2012-01-18 23:46:01 +00:00
|
|
|
var document = this.parser.document;
|
|
|
|
document.body = document.getElementsByTagName('body')[0];
|
2012-01-03 18:44:31 +00:00
|
|
|
|
2012-02-14 20:56:14 +00:00
|
|
|
//console.warn( 'onEnd: ' + document.body.innerHTML );
|
2012-01-18 23:46:01 +00:00
|
|
|
|
|
|
|
this.emit( 'document', document );
|
2012-01-04 11:00:54 +00:00
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
// XXX: more clean up to allow reuse.
|
|
|
|
this.parser.setup();
|
2012-02-01 18:37:48 +00:00
|
|
|
this.processToken(new TagTk( 'body' ));
|
2012-01-03 18:44:31 +00:00
|
|
|
};
|
|
|
|
|
2012-01-27 13:32:44 +00:00
|
|
|
FauxHTML5.TreeBuilder.prototype._att = function (maybeAttribs) {
|
|
|
|
var atts = [];
|
|
|
|
if ( maybeAttribs && $.isArray( maybeAttribs ) ) {
|
|
|
|
for(var i = 0, length = maybeAttribs.length; i < length; i++) {
|
|
|
|
var att = maybeAttribs[i];
|
2012-02-01 16:30:43 +00:00
|
|
|
atts.push({nodeName: att.k, nodeValue: att.v});
|
2012-01-27 13:32:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return atts;
|
|
|
|
};
|
2012-01-03 18:44:31 +00:00
|
|
|
|
2011-12-08 10:37:18 +00:00
|
|
|
// Adapt the token format to internal HTML tree builder format, call the actual
|
|
|
|
// html tree builder by emitting the token.
|
|
|
|
FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
|
2012-02-11 16:43:25 +00:00
|
|
|
if ( token.dataAttribs ) {
|
|
|
|
if ( ! token.attribs ) {
|
|
|
|
token.attribs = [];
|
|
|
|
}
|
|
|
|
token.attribs.push(
|
|
|
|
{
|
2012-04-02 16:12:49 +00:00
|
|
|
// Mediawiki-specific round-trip / non-semantic information
|
|
|
|
k: 'data-mw',
|
2012-02-11 16:43:25 +00:00
|
|
|
v: JSON.stringify( token.dataAttribs )
|
|
|
|
} );
|
|
|
|
}
|
2011-11-18 14:00:14 +00:00
|
|
|
|
2012-02-01 17:03:08 +00:00
|
|
|
switch( token.constructor ) {
|
|
|
|
case String:
|
|
|
|
this.emit('token', {type: 'Characters', data: token});
|
|
|
|
break;
|
|
|
|
case NlTk:
|
|
|
|
break;
|
2012-02-01 18:37:48 +00:00
|
|
|
case TagTk:
|
|
|
|
this.emit('token', {type: 'StartTag',
|
|
|
|
name: token.name,
|
|
|
|
data: this._att(token.attribs)});
|
|
|
|
break;
|
|
|
|
case SelfclosingTagTk:
|
|
|
|
this.emit('token', {type: 'StartTag',
|
|
|
|
name: token.name,
|
|
|
|
data: this._att(token.attribs)});
|
|
|
|
if ( HTML5.VOID_ELEMENTS.indexOf( token.name.toLowerCase() ) < 0 ) {
|
|
|
|
// VOID_ELEMENTS are automagically treated as self-closing by
|
|
|
|
// the tree builder
|
|
|
|
this.emit('token', {type: 'EndTag',
|
|
|
|
name: token.name,
|
|
|
|
data: this._att(token.attribs)});
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case EndTagTk:
|
|
|
|
this.emit('token', {type: 'EndTag',
|
|
|
|
name: token.name,
|
|
|
|
data: this._att(token.attribs)});
|
|
|
|
break;
|
2012-03-07 20:06:54 +00:00
|
|
|
case CommentTk:
|
|
|
|
this.emit('token', {type: 'Comment',
|
|
|
|
data: token.value});
|
|
|
|
break;
|
|
|
|
case EOFTk:
|
|
|
|
this.emit('end');
|
|
|
|
this.emit('token', { type: 'EOF' } );
|
|
|
|
this.document = this.parser.document;
|
|
|
|
if ( ! this.document.body ) {
|
|
|
|
// HACK: This should not be needed really.
|
|
|
|
this.document.body = this.parser.document.getElementsByTagName('body')[0];
|
2012-02-01 17:03:08 +00:00
|
|
|
}
|
2012-03-07 20:06:54 +00:00
|
|
|
// Emit the document to consumers
|
|
|
|
//this.emit('document', this.document);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
console.warn("Unhandled token: " + JSON.stringify(token));
|
|
|
|
break;
|
2012-02-01 17:03:08 +00:00
|
|
|
break;
|
2011-11-18 14:00:14 +00:00
|
|
|
}
|
2011-11-29 15:11:51 +00:00
|
|
|
};
|
|
|
|
|
2011-12-08 10:37:18 +00:00
|
|
|
|
|
|
|
|
2011-11-29 15:11:51 +00:00
|
|
|
if (typeof module == "object") {
|
|
|
|
module.exports.FauxHTML5 = FauxHTML5;
|
2011-11-18 14:00:14 +00:00
|
|
|
}
|