mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-29 00:30:44 +00:00
085a6f0985
The converter was misbehaving when handling <p>s inside <span>s. This can't be expressed in the linmod, but it would try to anyway. <span><p> would result in too many paragraph closing elements, leading to an exception in ve.dm.Document complaining about unbalanced input. <span>\n<p> would result in an exception in the converter itself while trying to perform whitespace preservation on the newline. This change makes the converter detect these scenarios and alienate the offending node. So <span><p>Foo</p></span> converts to a wrapper paragraph containing an alienInline whose HTML is "<p>Foo</p>" and which is annotated with a TextStyleSpanAnnotation. ve.dm.Converter.getDomFromData(): * Change the criteria for alienBlock vs alienInline ** Only infer from the node type if we're in wrapping mode AND we're at the same level where the wrapping started (wrappingIsOurs). If the latter isn't the case, we can't split the wrapper in the block case because we're at the wrong level. ** Use alienInline not only if the branch is a content branch, but also if there are active annotations. This catches e.g. <li><b><p> (and generally <span><p> on the top level). * Before converting a child element, check that the child isn't "bad". Bad children are non-content children in content branches, and non-content children encountered within a wrapper that we can't split. Only good children are converted, and bad children are alienated (cue Santa/Sinterklaas jokes). * Add childIsContent and rename branchIsContent to branchHasContent Change-Id: If420ae80ab0777424a9a5517335ef9d0170e87ae
1053 lines
36 KiB
JavaScript
1053 lines
36 KiB
JavaScript
/**
|
|
* VisualEditor data model Converter class.
|
|
*
|
|
* @copyright 2011-2012 VisualEditor Team and others; see AUTHORS.txt
|
|
* @license The MIT License (MIT); see LICENSE.txt
|
|
*/
|
|
|
|
/**
|
|
* Converter between HTML DOM and VisualEditor linear data.
|
|
*
|
|
* @class
|
|
* @constructor
|
|
* @param {Object} options Conversion options
|
|
*/
|
|
ve.dm.Converter = function VeDmConverter( nodeFactory, annotationFactory ) {
|
|
// Properties
|
|
this.nodeFactory = nodeFactory;
|
|
this.annotationFactory = annotationFactory;
|
|
this.elements = {
|
|
'toDomElement': {},
|
|
'toDataElement': {},
|
|
'dataElementTypes': {}
|
|
};
|
|
|
|
// Events
|
|
this.nodeFactory.addListenerMethod( this, 'register', 'onNodeRegister' );
|
|
};
|
|
|
|
/* Static Methods */
|
|
|
|
/**
|
|
* Get linear model data from a string optionally applying annotations
|
|
*
|
|
* @param {String} text Plain text to convert
|
|
* @param {Array} [annotations] Array of annotation objects to apply
|
|
* @returns {Array} Linear model data, one element per character
|
|
*/
|
|
ve.dm.Converter.getDataContentFromText = function ( text, annotations ) {
|
|
var characters = text.split( '' ),
|
|
annotationSet = new ve.AnnotationSet(),
|
|
i;
|
|
if ( !annotations || annotations.length === 0 ) {
|
|
return characters;
|
|
}
|
|
// Build annotation set
|
|
for ( i = 0; i < annotations.length; i++ ) {
|
|
annotationSet.push( annotations[i] );
|
|
}
|
|
// Apply annotations to characters
|
|
for ( i = 0; i < characters.length; i++ ) {
|
|
// Make a shallow copy of the annotationSet object, otherwise adding an annotation to one
|
|
// character automatically adds it to all of others as well, annotations should be treated
|
|
// as immutable, so it's OK to share references, but annotation sets are not immutable, so
|
|
// it's not safe to share references - each annotated character needs its own set
|
|
characters[i] = [characters[i], annotationSet.clone()];
|
|
}
|
|
return characters;
|
|
};
|
|
|
|
/* Methods */
|
|
|
|
/**
|
|
* Responds to register events from the node factory.
|
|
*
|
|
* If a node is special; such as document, alienInline, alienBlock and text; its {converters}
|
|
* property should be set to null, as to distinguish it from a new node type that someone has simply
|
|
* forgotten to implement converters for.
|
|
*
|
|
* @method
|
|
* @param {String} type Node type
|
|
* @param {Function} constructor Node constructor
|
|
* @throws 'Missing conversion data in node implementation of {type}'
|
|
*/
|
|
ve.dm.Converter.prototype.onNodeRegister = function ( dataElementType, constructor ) {
|
|
if ( constructor.converters === undefined ) {
|
|
throw new Error( 'Missing conversion data in node implementation of ' + dataElementType );
|
|
} else if ( constructor.converters !== null ) {
|
|
var i,
|
|
domElementTypes = constructor.converters.domElementTypes,
|
|
toDomElement = constructor.converters.toDomElement,
|
|
toDataElement = constructor.converters.toDataElement;
|
|
// Registration
|
|
this.elements.toDomElement[dataElementType] = toDomElement;
|
|
for ( i = 0; i < domElementTypes.length; i++ ) {
|
|
this.elements.toDataElement[domElementTypes[i]] = toDataElement;
|
|
this.elements.dataElementTypes[domElementTypes[i]] = dataElementType;
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Get the DOM element for a given linear model element.
|
|
*
|
|
* This invokes the toDomElement function registered for the element type.
|
|
*
|
|
* @method
|
|
* @param {Object} dataElement Linear model element
|
|
* @returns {HTMLElement|false} DOM element, or false if this element cannot be converted
|
|
*/
|
|
ve.dm.Converter.prototype.getDomElementFromDataElement = function ( dataElement ) {
|
|
var key, domElement, dataElementAttributes, wrapper,
|
|
dataElementType = dataElement.type;
|
|
if ( dataElementType === 'alienInline' || dataElementType === 'alienBlock' ) {
|
|
// Alien
|
|
// Create nodes from source
|
|
wrapper = document.createElement( 'div' );
|
|
wrapper.innerHTML = dataElement.attributes.html;
|
|
if ( wrapper.childNodes.length > 1 ) {
|
|
// Wrap the HTML in a single element, this makes
|
|
// it much easier to deal with. It'll be unwrapped
|
|
// at the end of getDomFromData().
|
|
domElement = document.createElement( 'div' );
|
|
domElement.setAttribute( 'data-ve-multi-child-alien-wrapper', 'true' );
|
|
while ( wrapper.firstChild ) {
|
|
domElement.appendChild( wrapper.firstChild );
|
|
}
|
|
} else {
|
|
domElement = wrapper.firstChild;
|
|
}
|
|
return domElement;
|
|
}
|
|
if ( !( dataElementType in this.elements.toDomElement ) ) {
|
|
// Unsupported element
|
|
return false;
|
|
}
|
|
|
|
domElement = this.elements.toDomElement[dataElementType]( dataElementType, dataElement );
|
|
dataElementAttributes = dataElement.attributes;
|
|
if ( dataElementAttributes ) {
|
|
for ( key in dataElementAttributes ) {
|
|
// Only include 'html/*' attributes and strip the 'html/' from the beginning of the name
|
|
if ( key.indexOf( 'html/' ) === 0 ) {
|
|
domElement.setAttribute( key.substr( 5 ), dataElementAttributes[key] );
|
|
}
|
|
}
|
|
}
|
|
// Change markers
|
|
if (
|
|
dataElement.internal && dataElement.internal.changed &&
|
|
!ve.isEmptyObject( dataElement.internal.changed ) &&
|
|
ve.init.platform.useChangeMarkers()
|
|
) {
|
|
domElement.setAttribute( 'data-ve-changed',
|
|
JSON.stringify( dataElement.internal.changed )
|
|
);
|
|
}
|
|
return domElement;
|
|
};
|
|
|
|
/**
|
|
* Get the linear model data element for a given DOM element.
|
|
*
|
|
* This invokes the toDataElement function registered for the element type
|
|
*
|
|
* @method
|
|
* @param {HTMLElement} domElement DOM element
|
|
* @param {Array} annotations Annotations to apply if the node is a content node
|
|
* @returns {Object|false} Linear model element, or false if this node cannot be converted
|
|
*/
|
|
ve.dm.Converter.prototype.getDataElementFromDomElement = function ( domElement, annotations ) {
|
|
var dataElement, domElementAttributes, dataElementAttributes, domElementAttribute, i,
|
|
annotationSet, domElementType = domElement.nodeName.toLowerCase();
|
|
annotations = annotations || [];
|
|
if (
|
|
// Unsupported elements
|
|
!( domElementType in this.elements.toDataElement )
|
|
// TODO check for generated elements
|
|
) {
|
|
return false;
|
|
}
|
|
dataElement = this.elements.toDataElement[domElementType]( domElementType, domElement );
|
|
domElementAttributes = domElement.attributes;
|
|
if ( domElementAttributes.length ) {
|
|
dataElementAttributes = dataElement.attributes = dataElement.attributes || {};
|
|
// Include all attributes and prepend 'html/' to each attribute name
|
|
for ( i = 0; i < domElementAttributes.length; i++ ) {
|
|
domElementAttribute = domElementAttributes[i];
|
|
dataElementAttributes['html/' + domElementAttribute.name] = domElementAttribute.value;
|
|
}
|
|
}
|
|
if ( ve.dm.nodeFactory.isNodeContent( dataElement.type ) && annotations.length > 0 ) {
|
|
// Build annotation set
|
|
annotationSet = new ve.AnnotationSet();
|
|
for ( i = 0; i < annotations.length; i++ ) {
|
|
annotationSet.push( annotations[i] );
|
|
}
|
|
dataElement.annotations = annotationSet;
|
|
}
|
|
return dataElement;
|
|
};
|
|
|
|
/**
|
|
* Check if an HTML DOM node represents an annotation, and if so, build an annotation object for it.
|
|
*
|
|
* @example Annotation Object
|
|
* { 'type': 'type', data: { 'key': 'value', ... } }
|
|
*
|
|
* @param {HTMLElement} domElement HTML DOM node
|
|
* @returns {Object|false} Annotation object, or false if this node is not an annotation
|
|
*/
|
|
ve.dm.Converter.prototype.getDataAnnotationFromDomElement = function ( domElement ) {
|
|
return ve.dm.annotationFactory.createFromElement( domElement ) || false;
|
|
};
|
|
|
|
/**
|
|
* Build an HTML DOM node for a linear model annotation.
|
|
*
|
|
* @method
|
|
* @param {Object} dataAnnotation Annotation object
|
|
* @returns {HTMLElement} HTML DOM node
|
|
*/
|
|
ve.dm.Converter.prototype.getDomElementFromDataAnnotation = function ( dataAnnotation ) {
|
|
var htmlData = dataAnnotation.toHTML(),
|
|
domElement = document.createElement( htmlData.tag );
|
|
ve.setDomAttributes( domElement, htmlData.attributes );
|
|
return domElement;
|
|
};
|
|
|
|
/**
|
|
* Convert an HTML DOM tree to a linear model.
|
|
*
|
|
* Do not use the annotations, dataElement and path parameters, they're used for internal
|
|
* recursion only.
|
|
*
|
|
* @method
|
|
* @param {HTMLElement} domElement Wrapper div containing the HTML to convert
|
|
* @param {Array} [annotations] Array of annotations (objects) to apply to the generated data
|
|
* @param {Object} [dataElement] Data element to wrap the returned data in
|
|
* @param {Array} [path] Array of linear model element types
|
|
* @param {Boolean} [alreadyWrapped] Whether the caller has already started wrapping bare content in a paragraph
|
|
* @returns {Array} Linear model data
|
|
*/
|
|
ve.dm.Converter.prototype.getDataFromDom = function ( domElement, annotations, dataElement, path, alreadyWrapped ) {
|
|
function createAlien( domElement, branchHasContent, isWrapper ) {
|
|
// If we're in wrapping mode, we don't know if this alien is supposed to be block
|
|
// or inline, so detect it based on the HTML tag name.
|
|
var isInline = ( wrapping && wrappingIsOurs ) ?
|
|
!ve.isBlockElement( domElement ) :
|
|
( branchHasContent || annotations.length > 0 ),
|
|
type = isInline ? 'alienInline' : 'alienBlock',
|
|
html, alien, annotationSet, i;
|
|
if ( isWrapper ) {
|
|
html = $( domElement ).html();
|
|
} else {
|
|
html = $( '<div>' ).append( $( domElement ).clone() ).html();
|
|
}
|
|
alien = [
|
|
{
|
|
'type': type,
|
|
'attributes': {
|
|
'html': html
|
|
}
|
|
},
|
|
{ 'type': '/' + type }
|
|
];
|
|
if ( annotations.length > 0 ) {
|
|
annotationSet = new ve.AnnotationSet();
|
|
for ( i = 0; i < annotations.length; i++ ) {
|
|
annotationSet.push( annotations[i] );
|
|
}
|
|
alien[0].annotations = annotationSet;
|
|
}
|
|
return alien;
|
|
}
|
|
function addWhitespace( element, index, whitespace ) {
|
|
if ( !element.internal ) {
|
|
element.internal = {};
|
|
}
|
|
// whitespace = [ outerPre, innerPre, innerPost, outerPost ]
|
|
// <tag> text </tag> <nextTag>
|
|
// ^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^ ^^^^^^^^^^
|
|
// outerPre innerPre innerPost outerPost
|
|
if ( !element.internal.whitespace ) {
|
|
element.internal.whitespace = [];
|
|
}
|
|
if ( !element.internal.whitespace[index] ) {
|
|
element.internal.whitespace[index] = '';
|
|
}
|
|
element.internal.whitespace[index] = whitespace;
|
|
}
|
|
function processNextWhitespace( element ) {
|
|
// This function uses and changes nextWhitespace in the outer function's scope,
|
|
// which means it's not really a function but more of a shortcut.
|
|
if ( nextWhitespace !== '' ) {
|
|
addWhitespace( element, 0, nextWhitespace );
|
|
nextWhitespace = '';
|
|
}
|
|
}
|
|
function stopWrapping() {
|
|
if ( wrappedWhitespace !== '' ) {
|
|
// Remove wrappedWhitespace from data
|
|
data.splice( -wrappedWhitespace.length, wrappedWhitespace.length );
|
|
addWhitespace( wrappingParagraph, 3, wrappedWhitespace );
|
|
nextWhitespace = wrappedWhitespace;
|
|
}
|
|
data.push( { 'type': '/paragraph' } );
|
|
wrappingParagraph = undefined;
|
|
wrapping = false;
|
|
wrappingIsOurs = false;
|
|
}
|
|
|
|
/**
|
|
* Helper function to group adjacent child elements with the same about attribute together.
|
|
* If there are multiple adjacent child nodes with the same about attribute, they are
|
|
* wrapped in a <div> with the data-ve-aboutgroup attribute set.
|
|
*
|
|
* This function does not wrap single-element about groups, and does not descend into the
|
|
* child elements.
|
|
*
|
|
* @param element {HTMLElement} Element to process
|
|
*/
|
|
function doAboutGrouping( element ) {
|
|
var child = element.firstChild, textNodes = [],
|
|
prevChild, aboutGroup, aboutWrapper, childAbout, nextChild, i;
|
|
while ( child ) {
|
|
nextChild = child.nextSibling;
|
|
if ( !child.getAttribute ) {
|
|
// Text nodes don't have a getAttribute() method. Thanks HTML DOM,
|
|
// that's really helpful ^^
|
|
textNodes.push( child );
|
|
child = nextChild;
|
|
continue;
|
|
}
|
|
childAbout = child.getAttribute( 'about' );
|
|
if ( childAbout && !aboutGroup ) {
|
|
// Start of a new about group
|
|
aboutGroup = childAbout;
|
|
} else if ( childAbout && childAbout === aboutGroup ) {
|
|
// Continuation of the current about group
|
|
if ( !aboutWrapper ) {
|
|
// This is the second child in this group, so the
|
|
// previous child is the first child in this group.
|
|
// Wrap the previous child
|
|
aboutWrapper = document.createElement( 'div' );
|
|
aboutWrapper.setAttribute( 'data-ve-aboutgroup', aboutGroup );
|
|
element.insertBefore( aboutWrapper, prevChild );
|
|
aboutWrapper.appendChild( prevChild );
|
|
}
|
|
// Append any outstanding text nodes to the wrapper
|
|
for ( i = 0; i < textNodes.length; i++ ) {
|
|
aboutWrapper.appendChild( textNodes[i] );
|
|
}
|
|
// Append this child to the wrapper
|
|
aboutWrapper.appendChild( child );
|
|
} else if ( aboutGroup ) {
|
|
// This child isn't in the current about group
|
|
aboutGroup = undefined;
|
|
aboutWrapper = undefined;
|
|
if ( childAbout ) {
|
|
// Start of a new about group
|
|
aboutGroup = childAbout;
|
|
}
|
|
}
|
|
prevChild = child;
|
|
child = nextChild;
|
|
textNodes = [];
|
|
}
|
|
}
|
|
|
|
// Fallback to defaults
|
|
annotations = annotations || [];
|
|
path = path || ['document'];
|
|
var i, j, childDomElement, annotation, childDataElement, text, childTypes, matches,
|
|
wrappingParagraph, prevElement, alien, rdfaType, isLink,
|
|
data = [],
|
|
branchType = path[path.length - 1],
|
|
branchHasContent = ve.dm.nodeFactory.canNodeContainContent( branchType ),
|
|
childIsContent,
|
|
nextWhitespace = '',
|
|
wrappedWhitespace = '',
|
|
wrapping = alreadyWrapped,
|
|
wrappingIsOurs = false;
|
|
// Open element
|
|
if ( dataElement ) {
|
|
data.push( dataElement );
|
|
}
|
|
// Do about grouping
|
|
// FIXME this assumes every about group is an alien
|
|
doAboutGrouping( domElement );
|
|
// Add contents
|
|
for ( i = 0; i < domElement.childNodes.length; i++ ) {
|
|
childDomElement = domElement.childNodes[i];
|
|
switch ( childDomElement.nodeType ) {
|
|
case Node.ELEMENT_NODE:
|
|
// Alienate about groups
|
|
if ( childDomElement.hasAttribute( 'data-ve-aboutgroup' ) ) {
|
|
alien = createAlien( childDomElement, branchHasContent, true );
|
|
if ( wrapping && alien[0].type === 'alienBlock' ) {
|
|
stopWrapping();
|
|
}
|
|
data = data.concat( alien );
|
|
processNextWhitespace( alien[0] );
|
|
prevElement = alien[0];
|
|
break;
|
|
}
|
|
|
|
// HACK handle <meta>/<link> separately because of the
|
|
// metaInline/metaBlock distinction
|
|
if (
|
|
childDomElement.nodeName.toLowerCase() === 'meta' ||
|
|
childDomElement.nodeName.toLowerCase() === 'link'
|
|
) {
|
|
isLink = childDomElement.nodeName.toLowerCase() === 'link';
|
|
childDataElement = {
|
|
'type': branchHasContent ? 'metaInline' : 'metaBlock',
|
|
'attributes': {
|
|
'style': isLink ? 'link' : 'meta',
|
|
'key': childDomElement.getAttribute( isLink ? 'rel' : 'property' )
|
|
}
|
|
};
|
|
if ( childDomElement.hasAttribute( isLink ? 'href' : 'content' ) ) {
|
|
childDataElement.attributes.value = childDomElement.getAttribute( isLink ? 'href' : 'content' );
|
|
}
|
|
// Preserve HTML attributes
|
|
// FIXME the following is duplicated from getDataElementFromDomElement()
|
|
// Include all attributes and prepend 'html/' to each attribute name
|
|
for ( j = 0; j < childDomElement.attributes.length; j++ ) {
|
|
// ..but exclude attributes we've already processed,
|
|
// because they'll be overwritten otherwise *sigh*
|
|
// FIXME this sucks, we need a new node type API so bad
|
|
if (
|
|
childDomElement.attributes[j].name !== ( isLink ? 'rel' : 'property' ) &&
|
|
childDomElement.attributes[j].name !== ( isLink ? 'href' : 'content' )
|
|
) {
|
|
childDataElement.attributes['html/' + childDomElement.attributes[j].name] = childDomElement.attributes[j].value;
|
|
}
|
|
}
|
|
data.push( childDataElement );
|
|
data.push( { 'type': branchHasContent ? '/metaInline' : '/metaBlock' } );
|
|
processNextWhitespace( childDataElement );
|
|
prevElement = childDataElement;
|
|
break;
|
|
}
|
|
// Alienate anything with a mw: type that isn't registered
|
|
// HACK because we don't actually have an RDFa type registry yet,
|
|
// this hardcodes the set of recognized types
|
|
rdfaType = childDomElement.getAttribute( 'rel' ) ||
|
|
childDomElement.getAttribute( 'typeof' ) ||
|
|
childDomElement.getAttribute( 'property' );
|
|
if (
|
|
rdfaType &&
|
|
rdfaType.match( /^mw:/ ) &&
|
|
!rdfaType.match( /^mw:WikiLink/ ) &&
|
|
!rdfaType.match( /^mw:ExtLink/ ) &&
|
|
!rdfaType.match( /^mw:Entity/ )
|
|
) {
|
|
alien = createAlien( childDomElement, branchHasContent );
|
|
if ( wrapping && alien[0].type === 'alienBlock' ) {
|
|
stopWrapping();
|
|
}
|
|
data = data.concat( alien );
|
|
processNextWhitespace( alien[0] );
|
|
prevElement = alien[0];
|
|
break;
|
|
}
|
|
|
|
// Detect and handle annotated content
|
|
// HACK except for mw:Entity. We need a node API rewrite, badly
|
|
annotation = this.getDataAnnotationFromDomElement( childDomElement );
|
|
if ( annotation && rdfaType !== 'mw:Entity' ) {
|
|
// Start auto-wrapping of bare content
|
|
if ( !wrapping && !branchHasContent ) {
|
|
// Mark this paragraph as having been generated by
|
|
// us, so we can strip it on the way out
|
|
wrappingParagraph = {
|
|
'type': 'paragraph',
|
|
'internal': { 'generated': 'wrapper' }
|
|
};
|
|
data.push( wrappingParagraph );
|
|
wrapping = true;
|
|
wrappingIsOurs = true;
|
|
processNextWhitespace( wrappingParagraph );
|
|
prevElement = wrappingParagraph;
|
|
}
|
|
// Append child element data
|
|
data = data.concat(
|
|
this.getDataFromDom(
|
|
childDomElement, annotations.concat( [ annotation ] ), undefined, path, wrapping
|
|
)
|
|
);
|
|
break;
|
|
}
|
|
|
|
// Look up child element type
|
|
childDataElement = this.getDataElementFromDomElement( childDomElement, annotations );
|
|
if ( childDataElement ) {
|
|
childIsContent = ve.dm.nodeFactory.isNodeContent( childDataElement.type );
|
|
// Check that something isn't terribly wrong
|
|
if ( !(
|
|
// Non-content child in a content container
|
|
( branchHasContent && !childIsContent ) ||
|
|
// Non-content child trying to break wrapping at
|
|
// the wrong level
|
|
( wrapping && !wrappingIsOurs && !childIsContent )
|
|
) ) {
|
|
// End auto-wrapping of bare content from a previously processed node
|
|
// but only if childDataElement is a non-content element
|
|
if ( wrapping && wrappingIsOurs && !childIsContent ) {
|
|
stopWrapping();
|
|
}
|
|
if ( ve.dm.nodeFactory.canNodeHaveChildren( childDataElement.type ) ) {
|
|
// Append child element data
|
|
data = data.concat(
|
|
this.getDataFromDom(
|
|
childDomElement,
|
|
[],
|
|
childDataElement,
|
|
path.concat( childDataElement.type ),
|
|
wrapping
|
|
)
|
|
);
|
|
} else {
|
|
// Append empty node
|
|
data.push( childDataElement );
|
|
data.push( { 'type': '/' + childDataElement.type } );
|
|
}
|
|
processNextWhitespace( childDataElement );
|
|
prevElement = childDataElement;
|
|
break;
|
|
}
|
|
// If something is wrong, fall through, and the bad child
|
|
// will be alienated below.
|
|
}
|
|
// We don't know what this is, fall back to alien.
|
|
alien = createAlien( childDomElement, branchHasContent );
|
|
// End auto-wrapping of bare content from a previously processed node
|
|
// but only if we produced an alienBlock
|
|
if ( wrapping && alien[0].type === 'alienBlock' ) {
|
|
stopWrapping();
|
|
}
|
|
data = data.concat( alien );
|
|
processNextWhitespace( alien[0] );
|
|
prevElement = alien[0];
|
|
break;
|
|
case Node.TEXT_NODE:
|
|
text = childDomElement.data;
|
|
if ( text === '' ) {
|
|
// Empty text node?!?
|
|
break;
|
|
}
|
|
if ( !branchHasContent ) {
|
|
// Strip and store outer whitespace
|
|
if ( text.match( /^\s+$/ ) ) {
|
|
// This text node is whitespace only
|
|
if ( wrapping ) {
|
|
// We're already wrapping, so output this whitespace
|
|
// and store it in wrappedWhitespace (see
|
|
// comment about wrappedWhitespace below)
|
|
wrappedWhitespace = text;
|
|
data = data.concat(
|
|
ve.dm.Converter.getDataContentFromText( wrappedWhitespace, annotations )
|
|
);
|
|
} else {
|
|
// We're not in wrapping mode, store this whitespace
|
|
if ( !prevElement ) {
|
|
if ( dataElement ) {
|
|
// First child, store as inner
|
|
// whitespace in the parent
|
|
addWhitespace( dataElement, 1, text );
|
|
}
|
|
// Else, WTF?!? This is not supposed to
|
|
// happen, but it's not worth
|
|
// throwing an exception over.
|
|
} else {
|
|
addWhitespace( prevElement, 3, text );
|
|
}
|
|
nextWhitespace = text;
|
|
wrappedWhitespace = '';
|
|
}
|
|
// We're done, no actual text left to process
|
|
break;
|
|
} else {
|
|
// This text node contains actual text
|
|
// Separate the real text from the whitespace
|
|
// HACK: . doesn't match newlines in JS, so use
|
|
// [\s\S] to match any character
|
|
matches = text.match( /^(\s*)([\s\S]*?)(\s*)$/ );
|
|
if ( !wrapping ) {
|
|
// Wrap the text in a paragraph and output it
|
|
// Mark this paragraph as having been generated by
|
|
// us, so we can strip it on the way out
|
|
wrappingParagraph = {
|
|
'type': 'paragraph',
|
|
'internal': { 'generated': 'wrapper' }
|
|
};
|
|
processNextWhitespace( wrappingParagraph );
|
|
data.push( wrappingParagraph );
|
|
wrapping = true;
|
|
wrappingIsOurs = true;
|
|
|
|
// Only store leading whitespace if we just
|
|
// started wrapping
|
|
if ( matches[1] !== '' ) {
|
|
if ( !prevElement ) {
|
|
if ( dataElement ) {
|
|
// First child, store as inner
|
|
// whitespace in the parent
|
|
addWhitespace( dataElement, 1, matches[1] );
|
|
}
|
|
// Else, WTF?!? This is not supposed to
|
|
// happen, but it's not worth
|
|
// throwing an exception over.
|
|
} else {
|
|
addWhitespace( prevElement, 3, matches[1] );
|
|
}
|
|
addWhitespace( wrappingParagraph, 0, matches[1] );
|
|
}
|
|
} else {
|
|
// We were already wrapping in a paragraph,
|
|
// so the leading whitespace must be output
|
|
data = data.concat(
|
|
ve.dm.Converter.getDataContentFromText( matches[1], annotations )
|
|
);
|
|
}
|
|
// Output the text sans whitespace
|
|
data = data.concat(
|
|
ve.dm.Converter.getDataContentFromText( matches[2], annotations )
|
|
);
|
|
|
|
// Don't store this in wrappingParagraph.internal.whitespace[3]
|
|
// and nextWhitespace just yet. Instead, store it
|
|
// in wrappedWhitespace. There might be more text
|
|
// nodes after this one, so we output wrappedWhitespace
|
|
// for now and undo that if it turns out this was
|
|
// the last text node. We can't output it later
|
|
// because we have to apply the correct annotations.
|
|
wrappedWhitespace = matches[3];
|
|
data = data.concat(
|
|
ve.dm.Converter.getDataContentFromText( wrappedWhitespace, annotations )
|
|
);
|
|
prevElement = wrappingParagraph;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Strip leading and trailing inner whitespace
|
|
// (but only in non-annotation nodes)
|
|
// and store it so it can be restored later.
|
|
if (
|
|
annotations.length === 0 && i === 0 && dataElement &&
|
|
!ve.dm.nodeFactory.doesNodeHaveSignificantWhitespace( dataElement.type )
|
|
) {
|
|
// Strip leading whitespace from the first child
|
|
matches = text.match( /^\s+/ );
|
|
if ( matches && matches[0] !== '' ) {
|
|
addWhitespace( dataElement, 1, matches[0] );
|
|
text = text.substring( matches[0].length );
|
|
}
|
|
}
|
|
if (
|
|
annotations.length === 0 &&
|
|
i === domElement.childNodes.length - 1 &&
|
|
dataElement &&
|
|
!ve.dm.nodeFactory.doesNodeHaveSignificantWhitespace( dataElement.type )
|
|
) {
|
|
// Strip trailing whitespace from the last child
|
|
matches = text.match( /\s+$/ );
|
|
if ( matches && matches[0] !== '' ) {
|
|
addWhitespace( dataElement, 2, matches[0] );
|
|
text = text.substring( 0,
|
|
text.length - matches[0].length );
|
|
}
|
|
}
|
|
|
|
// Annotate the text and output it
|
|
data = data.concat(
|
|
ve.dm.Converter.getDataContentFromText( text, annotations )
|
|
);
|
|
break;
|
|
case Node.COMMENT_NODE:
|
|
childDataElement = {
|
|
'type': branchHasContent ? 'metaInline' : 'metaBlock',
|
|
'attributes': {
|
|
'style': 'comment',
|
|
'text': childDomElement.data
|
|
}
|
|
};
|
|
data.push( childDataElement );
|
|
data.push( { 'type': branchHasContent ? '/metaInline' : '/metaBlock' } );
|
|
processNextWhitespace( childDataElement );
|
|
prevElement = childDataElement;
|
|
break;
|
|
}
|
|
}
|
|
// End auto-wrapping of bare content
|
|
if ( wrapping && wrappingIsOurs ) {
|
|
stopWrapping();
|
|
// Don't set wrapping = false here because it's checked below
|
|
wrapping = true;
|
|
}
|
|
|
|
// If we're closing a node that doesn't have any children, but could contain a paragraph,
|
|
// add a paragraph. This prevents things like empty list items
|
|
childTypes = ve.dm.nodeFactory.getChildNodeTypes( branchType );
|
|
if ( branchType !== 'paragraph' && dataElement && data[data.length - 1] === dataElement &&
|
|
!wrapping && !ve.dm.nodeFactory.canNodeContainContent( branchType ) &&
|
|
!ve.dm.nodeFactory.isNodeContent( branchType ) &&
|
|
( childTypes === null || ve.indexOf( 'paragraph', childTypes ) !== -1 )
|
|
) {
|
|
data.push( { 'type': 'paragraph', 'internal': { 'generated': 'empty' } } );
|
|
data.push( { 'type': '/paragraph' } );
|
|
}
|
|
|
|
// Close element
|
|
if ( dataElement ) {
|
|
data.push( { 'type': '/' + dataElement.type } );
|
|
// Add the whitespace after the last child to the parent as innerPost
|
|
if ( nextWhitespace !== '' ) {
|
|
addWhitespace( dataElement, 2, nextWhitespace );
|
|
nextWhitespace = '';
|
|
}
|
|
}
|
|
// Don't return an empty document
|
|
if ( branchType === 'document' && data.length === 0 ) {
|
|
return [
|
|
{ 'type': 'paragraph', 'internal': { 'generated': 'empty' } },
|
|
{ 'type': '/paragraph' }
|
|
];
|
|
}
|
|
return data;
|
|
};
|
|
|
|
/**
|
|
* Convert linear model data to an HTML DOM
|
|
*
|
|
* @method
|
|
* @param {Array} data Linear model data
|
|
* @returns {HTMLElement} Wrapper div containing the resulting HTML
|
|
*/
|
|
ve.dm.Converter.prototype.getDomFromData = function ( data ) {
|
|
var text, i, j, k, annotations, annotation, annotationElement, dataElement, arr,
|
|
childDomElement, pre, ours, theirs, parentDomElement, startClosingAt,
|
|
isContentNode, changed, parentChanged,
|
|
container = document.createElement( 'div' ),
|
|
domElement = container,
|
|
annotationStack = new ve.AnnotationSet();
|
|
|
|
for ( i = 0; i < data.length; i++ ) {
|
|
if ( typeof data[i] === 'string' ) {
|
|
// Text
|
|
text = '';
|
|
// Continue forward as far as the plain text goes
|
|
while ( typeof data[i] === 'string' ) {
|
|
text += data[i];
|
|
i++;
|
|
}
|
|
// i points to the first non-text thing, go back one so we don't skip this later
|
|
i--;
|
|
// Add text
|
|
domElement.appendChild( document.createTextNode( text ) );
|
|
} else if (
|
|
ve.isArray( data[i] ) ||
|
|
(
|
|
data[i].annotations !== undefined &&
|
|
ve.dm.nodeFactory.isNodeContent( data[i].type )
|
|
)
|
|
) {
|
|
// Annotated text or annotated nodes
|
|
text = '';
|
|
while (
|
|
ve.isArray( data[i] ) ||
|
|
(
|
|
data[i].annotations !== undefined &&
|
|
ve.dm.nodeFactory.isNodeContent( data[i].type )
|
|
)
|
|
) {
|
|
annotations = data[i].annotations || data[i][1];
|
|
// Close annotations as needed
|
|
// Go through annotationStack from bottom to top (low to high),
|
|
// and find the first annotation that's not in annotations.
|
|
startClosingAt = undefined;
|
|
arr = annotationStack.get();
|
|
for ( j = 0; j < arr.length; j++ ) {
|
|
annotation = arr[j];
|
|
if ( !annotations.contains( annotation ) ) {
|
|
startClosingAt = j;
|
|
break;
|
|
}
|
|
}
|
|
if ( startClosingAt !== undefined ) {
|
|
// Close all annotations from top to bottom (high to low)
|
|
// until we reach startClosingAt
|
|
for ( j = annotationStack.getLength() - 1; j >= startClosingAt; j-- ) {
|
|
// Add text if needed
|
|
if ( text.length > 0 ) {
|
|
domElement.appendChild( document.createTextNode( text ) );
|
|
text = '';
|
|
}
|
|
// Traverse up
|
|
domElement = domElement.parentNode;
|
|
// Remove from annotationStack
|
|
annotationStack.removeAt( j );
|
|
}
|
|
}
|
|
|
|
// Open annotations as needed
|
|
arr = annotations.get();
|
|
for ( j = 0; j < arr.length; j++ ) {
|
|
annotation = arr[j];
|
|
if ( !annotationStack.contains( annotation ) ) {
|
|
// Add text if needed
|
|
if ( text.length > 0 ) {
|
|
domElement.appendChild( document.createTextNode( text ) );
|
|
text = '';
|
|
}
|
|
// Create new node and descend into it
|
|
annotationElement = this.getDomElementFromDataAnnotation( annotation );
|
|
domElement.appendChild( annotationElement );
|
|
domElement = annotationElement;
|
|
// Add to annotationStack
|
|
annotationStack.push( annotation );
|
|
}
|
|
}
|
|
|
|
if ( data[i].annotations === undefined ) {
|
|
// Annotated text
|
|
text += data[i][0];
|
|
} else {
|
|
// Annotated node
|
|
// Add text if needed
|
|
if ( text.length > 0 ) {
|
|
domElement.appendChild( document.createTextNode( text ) );
|
|
text = '';
|
|
}
|
|
// Insert the element
|
|
domElement.appendChild( this.getDomElementFromDataElement( data[i] ) );
|
|
// Increment i once more so we skip over the closing as well
|
|
i++;
|
|
}
|
|
i++;
|
|
}
|
|
// We're now at the first non-annotated thing, go back one so we don't skip this later
|
|
i--;
|
|
|
|
// Add any gathered text
|
|
if ( text.length > 0 ) {
|
|
domElement.appendChild( document.createTextNode( text ) );
|
|
text = '';
|
|
}
|
|
// Close any remaining annotation nodes
|
|
for ( j = annotationStack.getLength() - 1; j >= 0; j-- ) {
|
|
// Traverse up
|
|
domElement = domElement.parentNode;
|
|
}
|
|
// Clear annotationStack
|
|
annotationStack = new ve.AnnotationSet();
|
|
} else if ( data[i].type !== undefined ) {
|
|
dataElement = data[i];
|
|
// Element
|
|
if ( dataElement.type.charAt( 0 ) === '/' ) {
|
|
parentDomElement = domElement.parentNode;
|
|
isContentNode = ve.dm.nodeFactory.isNodeContent( data[i].type.substr( 1 ) );
|
|
// Process whitespace
|
|
// whitespace = [ outerPre, innerPre, innerPost, outerPost ]
|
|
if (
|
|
!isContentNode &&
|
|
domElement.veInternal &&
|
|
domElement.veInternal.whitespace
|
|
) {
|
|
// Process inner whitespace. innerPre is for sure legitimate
|
|
// whitespace that should be inserted; if it was a duplicate
|
|
// of our child's outerPre, we would have cleared it.
|
|
pre = domElement.veInternal.whitespace[1];
|
|
if ( pre ) {
|
|
if (
|
|
domElement.firstChild &&
|
|
domElement.firstChild.nodeType === 3
|
|
) {
|
|
// First child is a TextNode, prepend to it
|
|
domElement.firstChild.insertData( 0, pre );
|
|
} else {
|
|
// Prepend a TextNode
|
|
domElement.insertBefore(
|
|
document.createTextNode( pre ),
|
|
domElement.firstChild
|
|
);
|
|
}
|
|
}
|
|
ours = domElement.veInternal.whitespace[2];
|
|
if ( domElement.lastOuterPost === undefined ) {
|
|
// This node didn't have any structural children
|
|
// (i.e. it's a content-containing node), so there's
|
|
// nothing to check innerPost against
|
|
theirs = ours;
|
|
} else {
|
|
theirs = domElement.lastOuterPost;
|
|
}
|
|
if ( ours && ours === theirs ) {
|
|
if (
|
|
domElement.lastChild &&
|
|
domElement.lastChild.nodeType === 3
|
|
) {
|
|
// Last child is a TextNode, append to it
|
|
domElement.lastChild.appendData( ours );
|
|
} else {
|
|
// Append a TextNode
|
|
domElement.appendChild(
|
|
document.createTextNode( ours )
|
|
);
|
|
}
|
|
}
|
|
// Tell the parent about our outerPost
|
|
parentDomElement.lastOuterPost = domElement.veInternal.whitespace[3] || '';
|
|
} else if ( !isContentNode ) {
|
|
// Use empty string, because undefined means there were no
|
|
// structural children
|
|
parentDomElement.lastOuterPost = '';
|
|
}
|
|
// else don't touch lastOuterPost
|
|
|
|
// If closing a generated wrapper node, unwrap it
|
|
// It would be nicer if we could avoid generating in the first
|
|
// place, but then remembering where we have to skip ascending
|
|
// to the parent would be tricky.
|
|
// We unwrap all nodes with generated=wrapper, as well as nodes that
|
|
// have generated=empty and are empty.
|
|
if (
|
|
domElement.veInternal && (
|
|
domElement.veInternal.generated === 'wrapper' || (
|
|
domElement.veInternal.generated === 'empty' &&
|
|
domElement.childNodes.length === 0
|
|
)
|
|
)
|
|
) {
|
|
while ( domElement.firstChild ) {
|
|
parentDomElement.insertBefore(
|
|
domElement.firstChild,
|
|
domElement
|
|
);
|
|
}
|
|
// Transfer change markers
|
|
changed = domElement.getAttribute( 'data-ve-changed' );
|
|
if ( changed ) {
|
|
parentChanged = parentDomElement.getAttribute( 'data-ve-changed' );
|
|
if ( parentChanged ) {
|
|
changed = $.parseJSON( changed );
|
|
parentChanged = $.parseJSON( parentChanged );
|
|
for ( k in changed ) {
|
|
if ( k in parentChanged ) {
|
|
parentChanged[k] += changed[k];
|
|
} else {
|
|
parentChanged[k] = changed[k];
|
|
}
|
|
}
|
|
parentDomElement.setAttribute( 'data-ve-changed',
|
|
$.toJSON( parentChanged ) );
|
|
} else {
|
|
parentDomElement.setAttribute( 'data-ve-changed',
|
|
changed );
|
|
}
|
|
}
|
|
parentDomElement.removeChild( domElement );
|
|
}
|
|
|
|
delete domElement.veInternal;
|
|
delete domElement.lastOuterPost;
|
|
// Ascend to parent node
|
|
domElement = parentDomElement;
|
|
} else {
|
|
// Create node from data
|
|
childDomElement = this.getDomElementFromDataElement( dataElement );
|
|
// Add reference to internal data
|
|
if ( dataElement.internal ) {
|
|
childDomElement.veInternal = dataElement.internal;
|
|
}
|
|
// Add element
|
|
domElement.appendChild( childDomElement );
|
|
// Descend into child node
|
|
parentDomElement = domElement;
|
|
domElement = childDomElement;
|
|
|
|
// Process outer whitespace
|
|
// Every piece of outer whitespace is duplicated somewhere:
|
|
// each node's outerPost is duplicated as the next node's
|
|
// outerPre, the first node's outerPre is the parent's
|
|
// innerPre, and the last node's outerPost is the parent's
|
|
// innerPost. For each piece of whitespace, we verify that
|
|
// the duplicate matches. If it doesn't, we take that to
|
|
// mean the user has messed with it and don't output any
|
|
// whitespace.
|
|
if ( domElement.veInternal && domElement.veInternal.whitespace ) {
|
|
// Process this node's outerPre
|
|
ours = domElement.veInternal.whitespace[0];
|
|
theirs = undefined;
|
|
if ( domElement.previousSibling ) {
|
|
// Get previous sibling's outerPost
|
|
theirs = parentDomElement.lastOuterPost;
|
|
} else if ( parentDomElement === container ) {
|
|
// outerPre of the very first node in the document, this one
|
|
// has no duplicate
|
|
theirs = ours;
|
|
} else {
|
|
// First child, get parent's innerPre
|
|
if (
|
|
parentDomElement.veInternal &&
|
|
parentDomElement.veInternal.whitespace
|
|
) {
|
|
theirs = parentDomElement.veInternal.whitespace[1];
|
|
// Clear after use so it's not used twice
|
|
parentDomElement.veInternal.whitespace[1] = undefined;
|
|
}
|
|
// else theirs=undefined
|
|
}
|
|
if ( ours && ours === theirs ) {
|
|
// Matches the duplicate, insert a TextNode
|
|
parentDomElement.insertBefore(
|
|
document.createTextNode( ours ),
|
|
domElement
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Process the outerPost whitespace of the very last node
|
|
if ( container.lastOuterPost !== undefined ) {
|
|
if ( container.lastChild && container.lastChild.nodeType === 3 ) {
|
|
// Last child is a TextNode, append to it
|
|
container.lastChild.appendData( container.lastOuterPost );
|
|
} else {
|
|
// Append a TextNode
|
|
container.appendChild( document.createTextNode( container.lastOuterPost ) );
|
|
}
|
|
delete container.lastOuterPost;
|
|
}
|
|
|
|
// Unwrap multi-child alien wrappers
|
|
$( container ).find( '[data-ve-multi-child-alien-wrapper]' ).each( function() {
|
|
$( this ).replaceWith( $( this ).contents() );
|
|
} );
|
|
|
|
// Workaround for bug 42469: if a <pre> starts with a newline, that means .innerHTML will
|
|
// screw up and stringify it with one fewer newline. Work around this by adding a newline.
|
|
// If we don't see a leading newline, we still don't know if the original HTML was
|
|
// <pre>Foo</pre> or <pre>\nFoo</pre> , but that's a syntactic difference, not a semantic
|
|
// one, and handling that is Parsoid's job.
|
|
$( container ).find( 'pre' ).each( function() {
|
|
var matches;
|
|
if ( this.firstChild.nodeType === Node.TEXT_NODE ) {
|
|
matches = this.firstChild.data.match( /^(\r\n|\r|\n)/ );
|
|
if ( matches && matches[1] ) {
|
|
// Prepend a newline exactly like the one we saw
|
|
this.firstChild.insertData( 0, matches[1] );
|
|
}
|
|
}
|
|
} );
|
|
return container;
|
|
};
|
|
|
|
/* Initialization */
|
|
|
|
ve.dm.converter = new ve.dm.Converter( ve.dm.nodeFactory, ve.dm.annotationFactory );
|