Serializer and table round-tripping improvements

* added stx: 'html' round-trip information for html tags
* added t_stx: 'row' info for row-wise table wiki syntax, and support for it
  in the serializer
* the first table row is implicit in wikitext
* renamed lastToken to prevToken in serializer
* strip first newline in an initial chunkCB

Change-Id: I014b046539d1b674d830551c5fd1b74a67f81993
This commit is contained in:
Gabriel Wicke 2012-05-21 14:46:07 +02:00
parent e069e7cb1c
commit a6610e52c2
2 changed files with 68 additions and 31 deletions

View file

@ -35,8 +35,8 @@ WSP._listHandler = function( bullet, state, token ) {
curList.itemCount > 2 || curList.itemCount > 2 ||
// A nested list, not directly after the li // A nested list, not directly after the li
( curList.itemCount > 1 && ( curList.itemCount > 1 &&
! ( state.lastToken.constructor === TagTk && ! ( state.prevToken.constructor === TagTk &&
state.lastToken.name === 'li') )) { state.prevToken.name === 'li') )) {
res = bullets; res = bullets;
} else { } else {
res = bullet; res = bullet;
@ -69,10 +69,10 @@ WSP._listItemHandler = function ( state, token ) {
WSP._serializeTableTag = function ( symbol, optionEndSymbol, state, token ) { WSP._serializeTableTag = function ( symbol, optionEndSymbol, state, token ) {
if ( token.attribs.length ) { if ( token.attribs.length ) {
return '\n' + symbol + ' ' + return symbol + ' ' +
WSP._serializeAttributes( token.attribs ) + optionEndSymbol; WSP._serializeAttributes( token.attribs ) + optionEndSymbol;
} else { } else {
return '\n' + symbol; return symbol;
} }
}; };
@ -114,15 +114,39 @@ WSP.tagToWikitext = {
dd: { start: id(":") }, dd: { start: id(":") },
// XXX: handle options // XXX: handle options
table: { table: {
start: WSP._serializeTableTag.bind(null, "{|", ''), start: WSP._serializeTableTag.bind(null, "\n{|", ''),
end: id("\n|}") end: id("\n|}")
}, },
tbody: {}, tbody: {},
th: { start: WSP._serializeTableTag.bind(null, "!", ' |') }, th: {
start: function ( state, token ) {
if ( token.dataAttribs.t_stx === 'row' ) {
return WSP._serializeTableTag("!!", ' |', state, token);
} else {
return WSP._serializeTableTag("\n!", ' |', state, token);
}
}
},
// XXX: omit for first row in table. // XXX: omit for first row in table.
tr: { start: WSP._serializeTableTag.bind(null, "|-", ' |') }, tr: {
td: { start: WSP._serializeTableTag.bind(null, "|", ' |') }, start: function ( state, token ) {
caption: { start: WSP._serializeTableTag.bind(null, "|+", ' |') }, if ( state.prevToken.constructor === TagTk && state.prevToken.name === 'tbody' ) {
return '';
} else {
return WSP._serializeTableTag("\n|-", ' |', state, token );
}
}
},
td: {
start: function ( state, token ) {
if ( token.dataAttribs.t_stx === 'row' ) {
return WSP._serializeTableTag("||", ' |', state, token);
} else {
return WSP._serializeTableTag("\n|", ' |', state, token);
}
}
},
caption: { start: WSP._serializeTableTag.bind(null, "\n|+", ' |') },
p: { p: {
start: function( state, token ) { start: function( state, token ) {
if (state.needParagraphLines) { if (state.needParagraphLines) {
@ -164,7 +188,15 @@ WSP._serializeAttributes = function ( attribs ) {
// XXX: round-trip optional whitespace / line breaks etc // XXX: round-trip optional whitespace / line breaks etc
return out.join(' '); return out.join(' ');
}; };
WSP._eatFirstNewLine = function ( state, chunk ) {
while ( chunk[0] === '\n' ) {
chunk = chunk.substr(1);
}
state.realChunkCB( chunk );
state.chunkCB = state.realChunkCB;
};
/** /**
* Serialize a chunk of tokens * Serialize a chunk of tokens
@ -172,16 +204,18 @@ WSP._serializeAttributes = function ( attribs ) {
WSP.serializeTokens = function( tokens, chunkCB ) { WSP.serializeTokens = function( tokens, chunkCB ) {
var state = $.extend({}, this.defaultOptions, this.options), var state = $.extend({}, this.defaultOptions, this.options),
i, l; i, l;
state.chunkCB = WSP._eatFirstNewLine.bind( this, state );
if ( chunkCB === undefined ) { if ( chunkCB === undefined ) {
var out = []; var out = [];
chunkCB = out.push.bind(out); state.realChunkCB = out.push.bind(out);
for ( i = 0, l = tokens.length; i < l; i++ ) { for ( i = 0, l = tokens.length; i < l; i++ ) {
this._serializeToken( state, chunkCB, tokens[i] ); this._serializeToken( state, tokens[i] );
} }
return out; return out;
} else { } else {
state.realChunkCB = chunkCB;
for ( i = 0, l = tokens.length; i < l; i++ ) { for ( i = 0, l = tokens.length; i < l; i++ ) {
this._serializeToken( state, chunkCB, tokens[i] ); this._serializeToken( state, tokens[i] );
} }
} }
}; };
@ -190,8 +224,8 @@ WSP.serializeTokens = function( tokens, chunkCB ) {
/** /**
* Serialize a token. * Serialize a token.
*/ */
WSP._serializeToken = function ( state, chunkCB, token ) { WSP._serializeToken = function ( state, token ) {
state.lastToken = state.curToken; state.prevToken = state.curToken;
state.curToken = token; state.curToken = token;
var handler; var handler;
switch( token.constructor ) { switch( token.constructor ) {
@ -199,23 +233,23 @@ WSP._serializeToken = function ( state, chunkCB, token ) {
case SelfclosingTagTk: case SelfclosingTagTk:
handler = this.tagToWikitext[token.name]; handler = this.tagToWikitext[token.name];
if ( handler && handler.start ) { if ( handler && handler.start ) {
chunkCB( handler.start( state, token ) ); state.chunkCB( handler.start( state, token ) );
} }
break; break;
case EndTagTk: case EndTagTk:
handler = this.tagToWikitext[token.name]; handler = this.tagToWikitext[token.name];
if ( handler && handler.end ) { if ( handler && handler.end ) {
chunkCB( handler.end( state, token ) ); state.chunkCB( handler.end( state, token ) );
} }
break; break;
case String: case String:
chunkCB( token ); state.chunkCB( token );
break; break;
case CommentTk: case CommentTk:
chunkCB( '<!--' + token.value + '-->' ); state.chunkCB( '<!--' + token.value + '-->' );
break; break;
case NlTk: case NlTk:
chunkCB( '\n' ); state.chunkCB( '\n' );
break; break;
case EOFTk: case EOFTk:
break; break;
@ -230,13 +264,15 @@ WSP._serializeToken = function ( state, chunkCB, token ) {
*/ */
WSP.serializeDOM = function( node, chunkCB ) { WSP.serializeDOM = function( node, chunkCB ) {
var state = $.extend({}, this.defaultOptions, this.options); var state = $.extend({}, this.defaultOptions, this.options);
state.chunkCB = this._eatFirstNewLine.bind( this, state );
if ( ! chunkCB ) { if ( ! chunkCB ) {
var out = []; var out = [];
chunkCB = out.push.bind( out ); state.realChunkCB = out.push.bind( out );
this._serializeDOM( node, chunkCB, state ); this._serializeDOM( node, state );
return out.join(''); return out.join('');
} else { } else {
this._serializeDOM( node, chunkCB, state ); state.realChunkCB = chunkCB;
this._serializeDOM( node, state );
} }
}; };
@ -244,7 +280,7 @@ WSP.serializeDOM = function( node, chunkCB ) {
* Internal worker. Recursively serialize a DOM subtree by creating tokens and * Internal worker. Recursively serialize a DOM subtree by creating tokens and
* calling _serializeToken on each of these. * calling _serializeToken on each of these.
*/ */
WSP._serializeDOM = function( node, chunkCB, state ) { WSP._serializeDOM = function( node, state ) {
// serialize this node // serialize this node
switch( node.nodeType ) { switch( node.nodeType ) {
case Node.ELEMENT_NODE: case Node.ELEMENT_NODE:
@ -256,23 +292,23 @@ WSP._serializeDOM = function( node, chunkCB, state ) {
var tkAttribs = this._getDOMAttribs(node.attributes), var tkAttribs = this._getDOMAttribs(node.attributes),
tkRTInfo = this._getDOMRTInfo(node.attributes); tkRTInfo = this._getDOMRTInfo(node.attributes);
this._serializeToken( state, chunkCB, this._serializeToken( state,
new TagTk( name, tkAttribs, tkRTInfo ) ); new TagTk( name, tkAttribs, tkRTInfo ) );
for ( var i = 0, l = children.length; i < l; i++ ) { for ( var i = 0, l = children.length; i < l; i++ ) {
// serialize all children // serialize all children
this._serializeDOM( children[i], chunkCB, state ); this._serializeDOM( children[i], state );
} }
this._serializeToken( state, chunkCB, this._serializeToken( state,
new EndTagTk( name, tkAttribs, tkRTInfo ) ); new EndTagTk( name, tkAttribs, tkRTInfo ) );
} else { } else {
console.warn( 'Unhandled element: ' + node.outerHTML ); console.warn( 'Unhandled element: ' + node.outerHTML );
} }
break; break;
case Node.TEXT_NODE: case Node.TEXT_NODE:
this._serializeToken( state, chunkCB, node.data ); this._serializeToken( state, node.data );
break; break;
case Node.COMMENT_NODE: case Node.COMMENT_NODE:
this._serializeToken( state, chunkCB, new CommentTk( node.data ) ); this._serializeToken( state, new CommentTk( node.data ) );
break; break;
default: default:
console.warn( "Unhandled node type: " + console.warn( "Unhandled node type: " +

View file

@ -1192,6 +1192,7 @@ generic_tag
res.dataAttribs = {}; res.dataAttribs = {};
} }
res.dataAttribs.sourceTagPos = [tagStartPos - 1, pos]; res.dataAttribs.sourceTagPos = [tagStartPos - 1, pos];
res.dataAttribs.stx = 'html';
return res; return res;
} }
@ -1430,7 +1431,7 @@ table_row_tag
table_data_tags table_data_tags
= pipe = pipe
td:table_data_tag td:table_data_tag
tds:( pipe_pipe tdt:table_data_tag { return tdt } )* { tds:( pipe_pipe tdt:table_data_tag { tdt[0].dataAttribs.t_stx = 'row'; return tdt } )* {
return td.concat(tds); return td.concat(tds);
} }
@ -1454,7 +1455,7 @@ table_heading_tags
= //& { console.warn( 'th enter @' + input.substr(pos, 10)); return true; } = //& { console.warn( 'th enter @' + input.substr(pos, 10)); return true; }
"!" "!"
th:table_heading_tag th:table_heading_tag
ths:( "!!" tht:table_heading_tag { return tht } )* { ths:( "!!" tht:table_heading_tag { tht[0].dataAttribs.t_stx = 'row'; return tht } )* {
//console.warn( 'thts: ' + pp(th.concat(ths))); //console.warn( 'thts: ' + pp(th.concat(ths)));
return th.concat(ths); return th.concat(ths);
} }