mediawiki-extensions-Visual.../modules/ve/dm/ve.dm.Converter.js
Catrope 68c5430872 Fix weird bug that was suddenly happening on the live site
When closing annotation nodes, we weren't popping them off
annotationStack. Not sure where this came from, but the code was
definitely bad and this fixes it.

Change-Id: I6d805e9aca3778666212135f76ff34c6baacbbc8
2012-08-06 13:51:51 -07:00

647 lines
22 KiB
JavaScript

/**
* VisualEditor data model Converter class.
*
* @copyright 2011-2012 VisualEditor Team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
/**
* Converter between HTML DOM and VisualEditor linear data.
*
* @class
* @constructor
* @param {Object} options Conversion options
*/
ve.dm.Converter = function( nodeFactory, annotationFactory ) {
// Properties
this.nodeFactory = nodeFactory;
this.annotationFactory = annotationFactory;
this.elements = { 'toDomElement': {}, 'toDataElement': {}, 'dataElementTypes': {} };
this.annotations = { 'toDomElement': {}, 'toDataAnnotation': {} };
// Events
this.nodeFactory.addListenerMethod( this, 'register', 'onNodeRegister' );
this.annotationFactory.addListenerMethod( this, 'register', 'onAnnotationRegister' );
};
/* Static Methods */
/**
* Get linear model data from a string optionally applying annotations
*
* @param {String} text Plain text to convert
* @param {Array} [annotations] Array of annotation objects to apply
* @returns {Array} Linear model data, one element per character
*/
ve.dm.Converter.getDataContentFromText = function( text, annotations ) {
var characters = text.split( '' ),
annotationMap = {},
i;
if ( !annotations || annotations.length === 0 ) {
return characters;
}
// Build annotation map
for ( i = 0; i < annotations.length; i++ ) {
if ( annotations[i].data && ve.isEmptyObject( annotations[i].data ) ) {
// Cleanup empty data property
delete annotations[i].data;
}
annotationMap[ve.getHash( annotations[i] )] = annotations[i];
}
// Apply annotations to characters
for ( i = 0; i < characters.length; i++ ) {
// Make a shallow copy of the annotationMap object, otherwise adding an annotation to one
// character automatically adds it to all of others as well, annotations should be treated
// as immutable, so it's OK to share references, but annotation maps are not immutable, so
// its not safe to share references - each annotated character needs its own map
characters[i] = [characters[i], ve.extendObject( {}, annotationMap )];
}
return characters;
};
/* Methods */
/**
* Responds to register events from the node factory.
*
* If a node is special; such as document, alienInline, alienBlock and text; its {converters}
* property should be set to null, as to distinguish it from a new node type that someone has simply
* forgotten to implement converters for.
*
* @method
* @param {String} type Node type
* @param {Function} constructor Node constructor
* @throws 'Missing conversion data in node implementation of {type}'
*/
ve.dm.Converter.prototype.onNodeRegister = function( dataElementType, constructor ) {
if ( constructor.converters === undefined ) {
throw 'Missing conversion data in node implementation of ' + dataElementType;
} else if ( constructor.converters !== null ) {
var i,
domElementTypes = constructor.converters.domElementTypes,
toDomElement = constructor.converters.toDomElement,
toDataElement = constructor.converters.toDataElement;
// Registration
this.elements.toDomElement[dataElementType] = toDomElement;
for ( i = 0; i < domElementTypes.length; i++ ) {
this.elements.toDataElement[domElementTypes[i]] = toDataElement;
this.elements.dataElementTypes[domElementTypes[i]] = dataElementType;
}
}
};
/**
* Responds to register events from the annotation factory.
*
* @method
* @param {String} type Base annotation type
* @param {Function} constructor Annotation constructor
* @throws 'Missing conversion data in annotation implementation of {type}'
*/
ve.dm.Converter.prototype.onAnnotationRegister = function( dataElementType, constructor ) {
if ( constructor.converters === undefined ) {
throw 'Missing conversion data in annotation implementation of ' + dataElementType;
} else if ( constructor.converters !== null ) {
var i,
domElementTypes = constructor.converters.domElementTypes,
toDomElement = constructor.converters.toDomElement,
toDataAnnotation = constructor.converters.toDataAnnotation;
// Registration
this.annotations.toDomElement[dataElementType] = toDomElement;
for ( i = 0; i < domElementTypes.length; i++ ) {
this.annotations.toDataAnnotation[domElementTypes[i]] = toDataAnnotation;
}
}
};
/**
* Get the DOM element for a given linear model element.
*
* This invokes the toDomElement function registered for the element type.
* NOTE: alienBlock and alienInline elements are not supported, if you pass them this function
* will return false. (Opposite of District 9: no aliens allowed.)
*
* @method
* @param {Object} dataElement Linear model element
* @returns {HTMLElement|false} DOM element, or false if this element cannot be converted
*/
ve.dm.Converter.prototype.getDomElementFromDataElement = function( dataElement ) {
var key, domElement, dataElementAttributes,
dataElementType = dataElement.type;
if (
// Aliens
dataElementType === 'alienInline' || dataElementType === 'alienBlock' ||
// Unsupported elements
!( dataElementType in this.elements.toDomElement)
) {
return false;
}
domElement = this.elements.toDomElement[dataElementType]( dataElementType, dataElement );
dataElementAttributes = dataElement.attributes;
if ( dataElementAttributes ) {
for ( key in dataElementAttributes ) {
// Only include 'html/*' attributes and strip the 'html/' from the beginning of the name
if ( key.indexOf( 'html/' ) === 0 ) {
domElement.setAttribute( key.substr( 5 ), dataElementAttributes[key] );
}
}
}
return domElement;
};
/**
* Get the linear model data element for a given DOM element.
*
* This invokes the toDataElement function registered for the element type, after checking that
* there is no data-mw-gc attribute.
*
* @method
* @param {HTMLElement} domElement DOM element
* @returns {Object|false} Linear model element, or false if this node cannot be converted
*/
ve.dm.Converter.prototype.getDataElementFromDomElement = function( domElement ) {
var dataElement, domElementAttributes, dataElementAttributes, domElementAttribute, i,
domElementType = domElement.nodeName.toLowerCase();
if (
// Generated elements
domElement.hasAttribute( 'data-mw-gc' ) ||
// Unsupported elements
!( domElementType in this.elements.toDataElement )
) {
return false;
}
dataElement = this.elements.toDataElement[domElementType]( domElementType, domElement );
domElementAttributes = domElement.attributes;
if ( domElementAttributes.length ) {
dataElementAttributes = dataElement.attributes = dataElement.attributes || {};
// Inlcude all attributes and prepend 'html/' to each attribute name
for ( i = 0; i < domElementAttributes.length; i++ ) {
domElementAttribute = domElementAttributes[i];
dataElementAttributes['html/' + domElementAttribute.name] = domElementAttribute.value;
}
}
return dataElement;
};
/**
* Check if an HTML DOM node represents an annotation, and if so, build an annotation object for it.
*
* @example Annotation Object
* { 'type': 'type', data: { 'key': 'value', ... } }
*
* @param {HTMLElement} domElement HTML DOM node
* @returns {Object|false} Annotation object, or false if this node is not an annotation
*/
ve.dm.Converter.prototype.getDataAnnotationFromDomElement = function( domElement ) {
var domElementType = domElement.nodeName.toLowerCase(),
toDataAnnotation = this.annotations.toDataAnnotation[domElementType];
if ( typeof toDataAnnotation === 'function' ) {
return toDataAnnotation( domElementType, domElement );
}
return false;
};
/**
* Build an HTML DOM node for a linear model annotation.
*
* @method
* @param {Object} dataAnnotation Annotation object
* @returns {HTMLElement|false} HTML DOM node, or false if this annotation is not known
*/
ve.dm.Converter.prototype.getDomElementFromDataAnnotation = function( dataAnnotation ) {
var split = dataAnnotation.type.split( '/', 2 ),
baseType = split[0],
subType = split.slice( 1 ).join( '/' ),
toDomElement = this.annotations.toDomElement[baseType];
if ( typeof toDomElement === 'function' ) {
return toDomElement( subType, dataAnnotation );
}
return false;
};
/**
* Convert an HTML DOM tree to a linear model.
*
* Do not use the annotations, dataElement and path parameters, they're used for internal
* recursion only.
*
* @method
* @param {HTMLElement} domElement Wrapper div containing the HTML to convert
* @param {Array} [annotations] Array of annotations (objects) to apply to the generated data
* @param {Object} [dataElement] Data element to wrap the returned data in
* @param {Array} [path] Array of linear model element types
* @returns {Array} Linear model data
*/
ve.dm.Converter.prototype.getDataFromDom = function( domElement, annotations, dataElement, path, alreadyWrapped ) {
function createAlien( domElement, isInline ) {
var type = isInline ? 'alienInline' : 'alienBlock';
return [
{
'type': type,
'attributes': {
'html': $( '<div>' ).append( $( domElement ).clone() ).html()
}
},
{ 'type': '/' + type }
];
}
// Fallback to defaults
annotations = annotations || [];
path = path || ['document'];
var i, childDomElement, annotation, childDataElement, text, contentNode, childTypes,
data = [],
branchType = path[path.length - 1],
branchIsContent = ve.dm.nodeFactory.canNodeContainContent( branchType ),
wrapping = false;
// Open element
if ( dataElement ) {
data.push( dataElement );
}
// Add contents
for ( i = 0; i < domElement.childNodes.length; i++ ) {
childDomElement = domElement.childNodes[i];
switch ( childDomElement.nodeType ) {
case Node.ELEMENT_NODE:
// Detect generated content and wrap it in an alien node
if ( childDomElement.hasAttribute( 'data-mw-gc' ) ) {
// FIXME Parsoid outputs RDFa now, address this in API rewrite
data = data.concat( createAlien( childDomElement, branchIsContent ) );
break;
}
// Detect and handle annotated content
annotation = this.getDataAnnotationFromDomElement( childDomElement );
if ( annotation ) {
// Start auto-wrapping of bare content
if ( !wrapping && !alreadyWrapped && !branchIsContent ) {
data.push( { 'type': 'paragraph' } );
wrapping = true;
}
// Append child element data
data = data.concat(
this.getDataFromDom(
childDomElement, annotations.concat( annotation ), undefined, path, wrapping || alreadyWrapped
)
);
break;
}
// End auto-wrapping of bare content
if ( wrapping ) {
data.push( { 'type': '/paragraph' } );
wrapping = false;
}
// Append child element data
childDataElement = this.getDataElementFromDomElement( childDomElement );
if ( childDataElement ) {
data = data.concat(
this.getDataFromDom(
childDomElement,
[],
childDataElement,
path.concat( childDataElement.type ),
wrapping || alreadyWrapped
)
);
break;
}
// We don't know what this is, fall back to alien
data = data.concat( createAlien( childDomElement, branchIsContent ) );
break;
case Node.TEXT_NODE:
// HACK: strip trailing newlines in <li> tags. Workaround for a Parsoid bug
text = childDomElement.data;
if ( domElement.nodeName.toLowerCase() === 'li' ) {
text = text.replace( /\n+$/, '' );
}
if ( text === '' ) {
// Don't produce an empty text node or an empty paragraph
break;
}
// HACK: strip implied leading and trailing newlines in <p> tags
// Workaround for a Parsoid bug
/*
* Leading newlines:
* If the previous sibling is a paragraph, do not strip leading newlines
* If there is no previous sibling, do not strip leading newlines
* Otherwise, strip 1 leading newline
*
* Trailing newlines:
* If the next sibling is a paragraph, strip 2 trailing newlines
* If there is no next sibling, do not strip trailing newlines
* Otherwise, strip 1 trailing newline
*/
contentNode = childDomElement.parentNode;
if ( contentNode.nodeName.toLowerCase() === 'p' ) {
if (
contentNode.previousSibling &&
contentNode.previousSibling.nodeName.toLowerCase() !== 'p' &&
text.charAt( 0 ) === '\n'
) {
text = text.substr( 1 );
}
if ( contentNode.nextSibling ) {
// Strip one trailing newline
if ( text.charAt( text.length - 1 ) === "\n" ) {
text = text.substr( 0, text.length - 1 );
}
if ( contentNode.nextSibling.nodeName.toLowerCase() === 'p' ) {
// Strip another one
if ( text.charAt( text.length - 1 ) === "\n" ) {
text = text.substr( 0, text.length - 1 );
}
}
}
}
if ( !branchIsContent ) {
// If it's bare content, strip leading and trailing newlines
text = text.replace( /^\n+/, '' ).replace( /\n+$/, '' );
if ( text === '' ) {
// Don't produce an empty text node
break;
}
}
// Start auto-wrapping of bare content
if ( !wrapping && !alreadyWrapped && !branchIsContent ) {
data.push( { 'type': 'paragraph' } );
wrapping = true;
}
// Annotate the text and output it
data = data.concat(
ve.dm.Converter.getDataContentFromText( text, annotations )
);
break;
case Node.COMMENT_NODE:
// TODO: Preserve comments by inserting them into the linear model too
break;
}
}
// End auto-wrapping of bare content
if ( wrapping ) {
data.push( { 'type': '/paragraph' } );
}
// If we're closing a node that doesn't have any children, but could contain a paragraph,
// add a paragraph. This prevents things like empty list items
childTypes = ve.dm.nodeFactory.getChildNodeTypes( branchType );
if ( branchType !== 'paragraph' && dataElement && data[data.length - 1] === dataElement &&
!wrapping && !ve.dm.nodeFactory.canNodeContainContent( branchType ) &&
!ve.dm.nodeFactory.isNodeContent( branchType ) &&
( childTypes === null || $.inArray( 'paragraph', childTypes ) !== -1 )
) {
data.push( { 'type': 'paragraph' } );
data.push( { 'type': '/paragraph' } );
}
// Close element
if ( dataElement ) {
data.push( { 'type': '/' + dataElement.type } );
}
// Don't return an empty document
if ( branchType === 'document' && data.length === 0 ) {
return [{ 'type': 'paragraph' }, { 'type': '/paragraph' }];
}
return data;
};
/**
* Convert linear model data to an HTML DOM
*
* @method
* @param {Array} data Linear model data
* @returns {HTMLElement} Wrapper div containing the resulting HTML
*/
ve.dm.Converter.prototype.getDomFromData = function( data ) {
var text, i, annotations, hash, annotationElement, done, dataElement, wrapper, childDomElement,
datamw,
container = document.createElement( 'div' ),
domElement = container,
annotationStack = {}; // { hash: DOMnode }
function fixupText( text, node ) {
// HACK reintroduce newlines needed to make Parsoid not freak out
// This reverses the newline stripping done in getDataFromDom()
/*
* Leading newlines:
* If the previous sibling is a heading, add 1 leading newline
* Otherwise, do not add any leading newlines
*
* Trailing newlines:
* If the next sibling is a paragraph, add 2 trailing newlines
* If there is no next sibling, do not add any trailing newlines
* Otherwise, add 1 trailing newline
*/
if ( node.parentNode.nodeName.toLowerCase() === 'p' ) {
if (
node.parentNode.previousSibling &&
node.parentNode.previousSibling.nodeName.toLowerCase().match( /h\d/ ) &&
!node.previousSibling
) {
text = "\n" + text;
}
if ( node.parentNode.nextSibling && !node.nextSibling ) {
// Add one trailing newline
text += "\n";
if ( node.parentNode.nextSibling.nodeName.toLowerCase() === 'p' ) {
// Add another one
text += "\n";
}
}
}
return text;
}
for ( i = 0; i < data.length; i++ ) {
if ( typeof data[i] === 'string' ) {
// Text
text = '';
// Continue forward as far as the plain text goes
while ( typeof data[i] === 'string' ) {
text += data[i];
i++;
}
// i points to the first non-text thing, go back one so we don't skip this later
i--;
// Add text
domElement.appendChild( document.createTextNode( text ) );
} else if (
ve.isArray( data[i] ) ||
(
data[i].annotations !== undefined &&
ve.dm.nodeFactory.isNodeContent( data[i].type )
)
) {
// Annotated text
text = '';
while (
ve.isArray( data[i] ) ||
(
data[i].annotations !== undefined &&
ve.dm.nodeFactory.isNodeContent( data[i].type )
)
) {
annotations = data[i].annotations || data[i][1];
// Check for closed annotations
for ( hash in annotationStack ) {
if ( !( hash in annotations ) ) {
// It's closed
// Traverse up until we hit the node we need to close, and then
// traverse up one more time to close that node
done = false;
while ( !done ) {
done = domElement === annotationStack[hash];
// Remove the annotation from the stack
delete annotationStack[domElement.veAnnotationHash];
// Remove the temporary veAnnotationHash property
delete domElement.veAnnotationHash;
// Add text if needed
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Traverse up
domElement = domElement.parentNode;
}
}
}
// Check for opened annotations
for ( hash in annotations ) {
if ( !( hash in annotationStack ) ) {
// It's opened
annotationElement = this.getDomElementFromDataAnnotation( annotations[hash] );
// Temporary property, will remove this when closing the annotation
annotationElement.veAnnotationHash = hash;
// Add to the annotation stack
annotationStack[hash] = annotationElement;
// Add text if needed
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Attach new node and descend into it
domElement.appendChild( annotationElement );
domElement = annotationElement;
}
}
if ( data[i].annotations === undefined ) {
text += data[i][0];
} else {
// Add text if needed
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Insert the element
domElement.appendChild( this.getDomElementFromDataElement( data[i] ) );
// Increment i once more so we skip over the closing as well
i++;
}
i++;
}
// We're now at the first non-annotated thing, go back one so we don't skip this later
i--;
// Add any gathered text
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Close any remaining annotation nodes
while ( domElement.veAnnotationHash !== undefined ) {
delete annotationStack[domElement.veAnnotationHash];
delete domElement.veAnnotationHash;
domElement = domElement.parentNode;
}
} else if ( data[i].type !== undefined ) {
dataElement = data[i];
// Element
if ( dataElement.type === 'alienBlock' || dataElement.type === 'alienInline' ) {
// Create nodes from source
wrapper = document.createElement( 'div' );
wrapper.innerHTML = dataElement.attributes.html;
// Add element - adds all child elements, but there really should only be 1
while ( wrapper.firstChild ) {
domElement.appendChild( wrapper.firstChild );
}
// Make sure the alien closing is skipped
i++;
} else if ( dataElement.type.charAt( 0 ) === '/' ) {
// Ascend to parent node
domElement = domElement.parentNode;
} else {
// Create node from data
childDomElement = this.getDomElementFromDataElement( dataElement );
// Add element
domElement.appendChild( childDomElement );
// Descend into child node
domElement = childDomElement;
}
}
}
// HACK: do postprocessing on the data to work around bugs in Parsoid concerning paragraphs
// inside list items
$( container ).find( 'li, dd, dt' ).each( function() {
var $sublists = $(this).children( 'ul, ol, dl' ),
$firstChild = $(this.firstChild),
$lastChild = $(this.lastChild);
if ( $firstChild.is( 'p' ) ) {
// Unwrap the first paragraph, unless it has stx=html
datamw = $.parseJSON( $firstChild.attr( 'data-rt' ) ) || {};
if ( datamw.stx !== 'html' ) {
$firstChild.replaceWith( $firstChild.contents() );
}
}
// Append a newline to the end of the <li> , provided its last child is not a list
if ( $lastChild.is( ':not(ul,ol)' ) ) {
$(this).append( "\n" );
}
// Append a newline before every sublist that is preceded by something
$sublists.each( function() {
if ( this.previousSibling ) {
if ( this.previousSibling.nodeName.toLowerCase() === 'text' ) {
this.previousSibling.data += "\n";
} else {
this.parentNode.insertBefore( document.createTextNode( "\n" ), this );
}
}
});
});
// HACK more postprocessing, this time to add newlines to paragraphs so Parsoid doesn't freak out
$( container )
// Get all text nodes
.find( '*' )
.contents()
.filter( function() {
// Text nodes only
return this.nodeType === 3 &&
// Exclude text nodes within lists
$( this.parentNode ).closest( 'li, dd, dt' ).length === 0;
} )
.each( function() {
this.data = fixupText( this.data, this );
} );
// And add newlines after headings too
$( container ).find( 'h1, h2, h3, h4, h5, h6' ).each( function() {
// If there is no next sibling, we don't need to add a newline
// If the next sibling is a paragraph, fixupText() has taken care of it
// Otherwise, add a newline after the heading
if ( this.nextSibling && this.nextSibling.nodeName.toLowerCase() !== 'p' ) {
this.parentNode.insertBefore( document.createTextNode( "\n" ), this.nextSibling );
}
// If the previous sibling exists and is a pre, we need to add a newline before
if ( this.previousSibling && this.previousSibling.nodeName.toLowerCase() === 'pre' ) {
this.parentNode.insertBefore( document.createTextNode( "\n" ), this );
}
} );
return container;
};
/* Initialization */
ve.dm.converter = new ve.dm.Converter( ve.dm.nodeFactory, ve.dm.annotationFactory );