Fix whitespace preservation around meta items

This was broken, especially in wrappers.

Changed the wrapping algorithm so that meta items are placed outside
wrappers if possible. On the left-hand side, this is already the case:
we don't open wrappers for meta items. On the right-hand side, this is
accomplished by buffering the meta items and only inserting them when
we encounter either real text (not whitespace) or the end of the wrapper.
If we're interrupted by real text, we insert the meta items with the
unmodified whitespace. If we're interrupted by the end of the wrapper,
we insert the meta items outside of the wrapper with whitespace stripped.

Internally, this is done by stripping the whitespace into the whitespace[0]
of the meta item to its right. Then when we output the meta items, we
either decide to 'restore' the whitespace, or to 'fixup' by also setting
whitespace[3] on the element before the whitespace.

Change-Id: Ibeea2a9906c4aae9fe6d284613edd6ec853ca5e7
This commit is contained in:
Catrope 2013-04-18 16:06:58 -07:00
parent 151c325c0d
commit 83a592f312
2 changed files with 110 additions and 9 deletions

View file

@ -342,6 +342,26 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
nextWhitespace = '';
}
}
function outputWrappedMetaItems( whitespaceTreatment ) {
var i, len, prev = wrappingParagraph;
for ( i = 0, len = wrappedMetaItems.length; i < len; i++ ) {
if ( wrappedMetaItems[i].type && wrappedMetaItems[i].type.charAt( 0 ) !== '/' ) {
if ( wrappedMetaItems[i].internal && wrappedMetaItems[i].internal.whitespace ) {
if ( whitespaceTreatment === 'restore' ) {
data = data.concat( ve.dm.Converter.getDataContentFromText(
wrappedMetaItems[i].internal.whitespace[0], context.annotations
) );
delete wrappedMetaItems[i].internal;
} else if ( whitespaceTreatment === 'fixup' ) {
addWhitespace( prev, 3, wrappedMetaItems[i].internal.whitespace[0] );
}
}
prev = wrappedMetaItems[i];
}
data.push( wrappedMetaItems[i] );
}
wrappedMetaItems = [];
}
function startWrapping() {
// Mark this paragraph as having been generated by
// us, so we can strip it on the way out
@ -363,6 +383,7 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
nextWhitespace = wrappedWhitespace;
}
data.push( { 'type': '/paragraph' } );
outputWrappedMetaItems( 'fixup' );
wrappingParagraph = undefined;
context.inWrapper = false;
context.canCloseWrapper = false;
@ -400,6 +421,7 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
nextWhitespace = '',
wrappedWhitespace = '',
wrappedWhitespaceIndex,
wrappedMetaItems = [],
context = {},
prevContext = this.contextStack.length ? this.contextStack[this.contextStack.length - 1] : null;
@ -472,9 +494,19 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
if ( childDataElements.length === 1 ) {
childDataElements.push( { 'type': '/' + childDataElements[0].type } );
}
data = data.concat( childDataElements );
processNextWhitespace( childDataElements[0] );
prevElement = childDataElements[0];
if ( context.inWrapper ) {
wrappedMetaItems = wrappedMetaItems.concat( childDataElements );
if ( wrappedWhitespace !== '' ) {
data.splice( wrappedWhitespaceIndex, wrappedWhitespace.length );
addWhitespace( childDataElements[0], 0, wrappedWhitespace );
nextWhitespace = wrappedWhitespace;
wrappedWhitespace = '';
}
} else {
data = data.concat( childDataElements );
processNextWhitespace( childDataElements[0] );
prevElement = childDataElements[0];
}
break;
}
@ -565,6 +597,7 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
}
nextWhitespace = text;
wrappedWhitespace = '';
outputWrappedMetaItems( 'restore' );
}
// We're done, no actual text left to process
break;
@ -596,6 +629,7 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
addWhitespace( wrappingParagraph, 0, matches[1] );
}
} else {
outputWrappedMetaItems( 'restore' );
// We were already wrapping in a paragraph,
// so the leading whitespace must be output
data = data.concat(
@ -659,7 +693,7 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
);
break;
case Node.COMMENT_NODE:
// TODO treat this as a node with nodeName #comment
// TODO treat this as a node with nodeName #comment, removes code duplication
childDataElements = [
{
'type': 'alienMeta',
@ -670,9 +704,19 @@ ve.dm.Converter.prototype.getDataFromDomRecursion = function ( domElement, wrapp
},
{ 'type': '/alienMeta' }
];
data = data.concat( childDataElements );
processNextWhitespace( childDataElements[0] );
prevElement = childDataElements[0];
if ( context.inWrapper ) {
wrappedMetaItems = wrappedMetaItems.concat( childDataElements );
if ( wrappedWhitespace !== '' ) {
data.splice( wrappedWhitespaceIndex, wrappedWhitespace.length );
addWhitespace( childDataElements[0], 0, wrappedWhitespace );
nextWhitespace = wrappedWhitespace;
wrappedWhitespace = '';
}
} else {
data = data.concat( childDataElements );
processNextWhitespace( childDataElements[0] );
prevElement = childDataElements[0];
}
break;
}
}
@ -879,7 +923,7 @@ ve.dm.Converter.prototype.getDomSubtreeFromData = function ( store, data, contai
// Element
if ( dataElement.type.charAt( 0 ) === '/' ) {
parentDomElement = domElement.parentNode;
isContentNode = this.metaItemFactory.lookup( data[i].type.substr( 1 ) ) ||
isContentNode = !this.metaItemFactory.lookup( data[i].type.substr( 1 ) ) &&
this.nodeFactory.isNodeContent( data[i].type.substr( 1 ) );
// Process whitespace
// whitespace = [ outerPre, innerPre, innerPost, outerPost ]

View file

@ -1673,6 +1673,62 @@ ve.dm.example.domToDataCases = {
{ 'type': '/table' }
]
},
'whitespace preservation with wrapped text, comments and language links': {
'html': '<body><!-- Foo --> <!-- Bar -->\nFoo\n' +
'<link rel="mw:WikiLink/Language" href="http://de.wikipedia.org/wiki/Foo">\n' +
'<link rel="mw:WikiLink/Language" href="http://fr.wikipedia.org/wiki/Foo"></body>',
'data': [
{
'type': 'alienMeta',
'internal': { 'whitespace': [ undefined, undefined, undefined, ' ' ] },
'attributes': {
'style': 'comment',
'text': ' Foo '
}
},
{ 'type': '/alienMeta' },
{
'type': 'alienMeta',
'internal': { 'whitespace': [ ' ', undefined, undefined, '\n' ] },
'attributes': {
'style': 'comment',
'text': ' Bar '
}
},
{ 'type': '/alienMeta' },
{
'type': 'paragraph',
'internal': {
'generated': 'wrapper',
'whitespace': [ '\n', undefined, undefined, '\n' ]
}
},
'F',
'o',
'o',
{ 'type': '/paragraph' },
{
'type': 'MWlanguage',
'attributes': {
'href': 'http://de.wikipedia.org/wiki/Foo',
'html/0/href': 'http://de.wikipedia.org/wiki/Foo',
'html/0/rel': 'mw:WikiLink/Language'
},
'internal': { 'whitespace': [ '\n', undefined, undefined, '\n' ] }
},
{ 'type': '/MWlanguage' },
{
'type': 'MWlanguage',
'attributes': {
'href': 'http://fr.wikipedia.org/wiki/Foo',
'html/0/href': 'http://fr.wikipedia.org/wiki/Foo',
'html/0/rel': 'mw:WikiLink/Language'
},
'internal': { 'whitespace': [ '\n' ] }
},
{ 'type': '/MWlanguage' }
]
},
'mismatching whitespace data is ignored': {
'html': null,
'data': [
@ -2134,8 +2190,10 @@ ve.dm.example.domToDataCases = {
'F',
'o',
'o',
{ 'type': '/paragraph' },
{
'type': 'alienMeta',
'internal': { 'whitespace': [ '\n' ] },
'attributes': {
'style': 'meta',
'key': 'mw:foo',
@ -2145,7 +2203,6 @@ ve.dm.example.domToDataCases = {
}
},
{ 'type': '/alienMeta' },
{ 'type': '/paragraph' },
{ 'type': '/tableCell' },
{ 'type': '/tableRow' },
{ 'type': '/tableSection' },