Improve the handling of newlines for round-tripping

An improvement, but there still are some extra newlines inserted after
paragraphs. Example input:

-------

Foo:
{|
|foo
|}
-------

Extra newlines are inserted after the Foo: and the foo in the table. They are
not fed as tokens or text to the tree builder, so there is likely a bug in the
html5 library or JSDom.

Change-Id: I83eb6180e3cd1c4e7f9b15b31d339e1d32bccd3f
This commit is contained in:
Gabriel Wicke 2012-06-05 23:44:23 +02:00
parent 59fc634cce
commit a146fcb8ad
3 changed files with 16 additions and 14 deletions

View file

@ -34,9 +34,10 @@ PostExpandParagraphHandler.prototype.register = function ( dispatcher ) {
PostExpandParagraphHandler.prototype.reset = function ( token, frame, cb ) {
//console.warn( 'PostExpandParagraphHandler.reset ' + JSON.stringify( this.tokens ) );
if ( this.newLines ) {
return { tokens: this._finish() };
this.tokens.push( token );
return this._finish();
} else {
return { token: token };
return [token];
}
};
@ -66,19 +67,17 @@ PostExpandParagraphHandler.prototype.onNewLine = function ( token, frame, cb )
};
PostExpandParagraphHandler.prototype.onEnd = function ( token, frame, cb ) {
var tokens = this.tokens;
this.reset();
return { tokens: tokens.concat( [token] ) };
return { tokens: this.reset( token ) };
}
PostExpandParagraphHandler.prototype.onAny = function ( token, frame, cb ) {
//console.warn( 'PostExpandParagraphHandler.onAny' );
this.tokens.push( token );
if ( token.constructor === CommentTk ||
( token.constructor === String && token.match( /^[\t ]*$/ ) )
)
{
// Continue with collection..
this.tokens.push( token );
return {};
} else {
// XXX: Only open paragraph if inline token follows!
@ -86,14 +85,16 @@ PostExpandParagraphHandler.prototype.onAny = function ( token, frame, cb ) {
// None of the tokens we are interested in, so abort processing..
//console.warn( 'PostExpandParagraphHandler.onAny: ' + JSON.stringify( this.tokens, null , 2 ) );
if ( this.newLines >= 2 && ! u.isBlockToken( token ) ) {
this.tokens.push( token );
var nlTks = [];
while ( this.tokens[0].constructor === NlTk ) {
nlTks.push( this.tokens.shift() );
}
//console.warn( 'insert p:' + JSON.stringify( token, null, 2 ) );
return { tokens: nlTks.concat([ new TagTk( 'p' ) ], this._finish() ) };
var res = { tokens: nlTks.concat([ new TagTk( 'p' ) ], this._finish() ) };
//console.warn( 'insert p:' + JSON.stringify( res, null, 2 ) );
return res;
} else {
return { tokens: this._finish() };
return { tokens: this.reset(token) };
}
}

View file

@ -72,6 +72,8 @@ FauxHTML5.TreeBuilder.prototype._att = function (maybeAttribs) {
// Adapt the token format to internal HTML tree builder format, call the actual
// html tree builder by emitting the token.
FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
//console.warn( 'processToken: ' + JSON.stringify( token ));
var attribs = token.attribs || [];
if ( token.dataAttribs ) {
var dataMW = JSON.stringify( token.dataAttribs );
@ -132,7 +134,6 @@ FauxHTML5.TreeBuilder.prototype.processToken = function (token) {
default:
console.warn("Unhandled token: " + JSON.stringify(token));
break;
break;
}
};

View file

@ -494,7 +494,7 @@ inline
inlineline
= c:(urltext / !inline_breaks (inline_element / [^\n]))+ {
= c:(urltext / !inline_breaks (inline_element / [^\r\n]))+ {
return flatten_stringlist( c );
}
@ -723,7 +723,7 @@ urlencoded_char = "%" c0:[0-9a-fA-F] c1:[0-9a-fA-F] {
//[^][<>"\\x00-\\x20\\x7F\p{Zs}]
// no punctiation, and '{<' to trigger directives
no_punctuation_char = [^ :\]\[\n"'<>\x00-\x20\x7f,.&%\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000{]
no_punctuation_char = [^ :\]\[\r\n"'<>\x00-\x20\x7f,.&%\u00A0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000{]
url
= proto:url_protocol
@ -1993,7 +1993,7 @@ attribute_preprocessor_text_single_line
= r:( t:[^{}&']+ { return t.join(''); }
/ !inline_breaks (
directive
/ !'\n' [{&] )
/ ![\r\n] [{&] )
)* {
return flatten_string ( r );
}
@ -2001,7 +2001,7 @@ attribute_preprocessor_text_double_line
= r:( t:[^{}&"]+ { return t.join(''); }
/ !inline_breaks (
directive
/ !'\n' [{&] )
/ ![\r\n] [{&] )
)* {
//console.warn( 'double:' + pp(r) );
return flatten_string ( r );