mediawiki-extensions-Visual.../modules/ve/dm/ve.dm.Converter.js

628 lines
21 KiB
JavaScript
Raw Normal View History

/**
* VisualEditor data model Converter class.
*
* @copyright 2011-2012 VisualEditor Team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
/**
* Converter between HTML DOM and VisualEditor linear data.
*
* @class
* @constructor
* @param {Object} options Conversion options
*/
ve.dm.Converter = function ( nodeFactory, annotationFactory ) {
// Properties
this.nodeFactory = nodeFactory;
this.annotationFactory = annotationFactory;
this.elements = { 'toDomElement': {}, 'toDataElement': {}, 'dataElementTypes': {} };
this.annotations = { 'toDomElement': {}, 'toDataAnnotation': {} };
// Events
this.nodeFactory.addListenerMethod( this, 'register', 'onNodeRegister' );
this.annotationFactory.addListenerMethod( this, 'register', 'onAnnotationRegister' );
};
/* Static Methods */
/**
* Get linear model data from a string optionally applying annotations
*
* @param {String} text Plain text to convert
* @param {Array} [annotations] Array of annotation objects to apply
* @returns {Array} Linear model data, one element per character
*/
ve.dm.Converter.getDataContentFromText = function ( text, annotations ) {
var characters = text.split( '' ),
annotationMap = {},
i;
if ( !annotations || annotations.length === 0 ) {
return characters;
}
// Build annotation map
for ( i = 0; i < annotations.length; i++ ) {
if ( annotations[i].data && ve.isEmptyObject( annotations[i].data ) ) {
// Cleanup empty data property
delete annotations[i].data;
}
annotationMap[ve.getHash( annotations[i] )] = annotations[i];
}
// Apply annotations to characters
for ( i = 0; i < characters.length; i++ ) {
// Make a shallow copy of the annotationMap object, otherwise adding an annotation to one
// character automatically adds it to all of others as well, annotations should be treated
// as immutable, so it's OK to share references, but annotation maps are not immutable, so
// its not safe to share references - each annotated character needs its own map
characters[i] = [characters[i], ve.extendObject( {}, annotationMap )];
}
return characters;
};
/* Methods */
/**
* Responds to register events from the node factory.
*
* If a node is special; such as document, alienInline, alienBlock and text; its {converters}
* property should be set to null, as to distinguish it from a new node type that someone has simply
* forgotten to implement converters for.
*
* @method
* @param {String} type Node type
* @param {Function} constructor Node constructor
* @throws 'Missing conversion data in node implementation of {type}'
*/
ve.dm.Converter.prototype.onNodeRegister = function ( dataElementType, constructor ) {
if ( constructor.converters === undefined ) {
throw new Error( 'Missing conversion data in node implementation of ' + dataElementType );
} else if ( constructor.converters !== null ) {
var i,
domElementTypes = constructor.converters.domElementTypes,
toDomElement = constructor.converters.toDomElement,
toDataElement = constructor.converters.toDataElement;
// Registration
this.elements.toDomElement[dataElementType] = toDomElement;
for ( i = 0; i < domElementTypes.length; i++ ) {
this.elements.toDataElement[domElementTypes[i]] = toDataElement;
this.elements.dataElementTypes[domElementTypes[i]] = dataElementType;
}
}
};
/**
* Responds to register events from the annotation factory.
*
* @method
* @param {String} type Base annotation type
* @param {Function} constructor Annotation constructor
* @throws 'Missing conversion data in annotation implementation of {type}'
*/
ve.dm.Converter.prototype.onAnnotationRegister = function ( dataElementType, constructor ) {
if ( constructor.converters === undefined ) {
throw new Error( 'Missing conversion data in annotation implementation of ' + dataElementType );
} else if ( constructor.converters !== null ) {
var i,
domElementTypes = constructor.converters.domElementTypes,
toDomElement = constructor.converters.toDomElement,
toDataAnnotation = constructor.converters.toDataAnnotation;
// Registration
this.annotations.toDomElement[dataElementType] = toDomElement;
for ( i = 0; i < domElementTypes.length; i++ ) {
this.annotations.toDataAnnotation[domElementTypes[i]] = toDataAnnotation;
}
}
};
/**
* Get the DOM element for a given linear model element.
*
* This invokes the toDomElement function registered for the element type.
* NOTE: alienBlock and alienInline elements are not supported, if you pass them this function
* will return false. (Opposite of District 9: no aliens allowed.)
*
* @method
* @param {Object} dataElement Linear model element
* @returns {HTMLElement|false} DOM element, or false if this element cannot be converted
*/
ve.dm.Converter.prototype.getDomElementFromDataElement = function ( dataElement ) {
var key, domElement, dataElementAttributes,
dataElementType = dataElement.type;
if (
// Aliens
dataElementType === 'alienInline' || dataElementType === 'alienBlock' ||
// Unsupported elements
!( dataElementType in this.elements.toDomElement)
) {
return false;
}
domElement = this.elements.toDomElement[dataElementType]( dataElementType, dataElement );
dataElementAttributes = dataElement.attributes;
if ( dataElementAttributes ) {
for ( key in dataElementAttributes ) {
// Only include 'html/*' attributes and strip the 'html/' from the beginning of the name
if ( key.indexOf( 'html/' ) === 0 ) {
domElement.setAttribute( key.substr( 5 ), dataElementAttributes[key] );
}
}
}
return domElement;
};
/**
* Get the linear model data element for a given DOM element.
*
* This invokes the toDataElement function registered for the element type, after checking that
* there is no data-mw-gc attribute.
*
* @method
* @param {HTMLElement} domElement DOM element
* @returns {Object|false} Linear model element, or false if this node cannot be converted
*/
ve.dm.Converter.prototype.getDataElementFromDomElement = function ( domElement ) {
var dataElement, domElementAttributes, dataElementAttributes, domElementAttribute, i,
domElementType = domElement.nodeName.toLowerCase();
if (
// Generated elements
domElement.hasAttribute( 'data-mw-gc' ) ||
// Unsupported elements
!( domElementType in this.elements.toDataElement )
) {
return false;
}
dataElement = this.elements.toDataElement[domElementType]( domElementType, domElement );
domElementAttributes = domElement.attributes;
if ( domElementAttributes.length ) {
dataElementAttributes = dataElement.attributes = dataElement.attributes || {};
// Inlcude all attributes and prepend 'html/' to each attribute name
for ( i = 0; i < domElementAttributes.length; i++ ) {
domElementAttribute = domElementAttributes[i];
dataElementAttributes['html/' + domElementAttribute.name] = domElementAttribute.value;
}
}
return dataElement;
};
/**
* Check if an HTML DOM node represents an annotation, and if so, build an annotation object for it.
*
* @example Annotation Object
* { 'type': 'type', data: { 'key': 'value', ... } }
*
* @param {HTMLElement} domElement HTML DOM node
* @returns {Object|false} Annotation object, or false if this node is not an annotation
*/
ve.dm.Converter.prototype.getDataAnnotationFromDomElement = function ( domElement ) {
var domElementType = domElement.nodeName.toLowerCase(),
toDataAnnotation = this.annotations.toDataAnnotation[domElementType];
if ( typeof toDataAnnotation === 'function' ) {
return toDataAnnotation( domElementType, domElement );
}
return false;
};
/**
* Build an HTML DOM node for a linear model annotation.
*
* @method
* @param {Object} dataAnnotation Annotation object
* @returns {HTMLElement|false} HTML DOM node, or false if this annotation is not known
*/
ve.dm.Converter.prototype.getDomElementFromDataAnnotation = function ( dataAnnotation ) {
var split = dataAnnotation.type.split( '/' ),
baseType = split[0],
subType = split.slice( 1 ).join( '/' ),
toDomElement = this.annotations.toDomElement[baseType];
if ( typeof toDomElement === 'function' ) {
return toDomElement( subType, dataAnnotation );
}
return false;
};
/**
* Convert an HTML DOM tree to a linear model.
*
* Do not use the annotations, dataElement and path parameters, they're used for internal
* recursion only.
*
* @method
* @param {HTMLElement} domElement Wrapper div containing the HTML to convert
* @param {Array} [annotations] Array of annotations (objects) to apply to the generated data
* @param {Object} [dataElement] Data element to wrap the returned data in
* @param {Array} [path] Array of linear model element types
* @returns {Array} Linear model data
*/
ve.dm.Converter.prototype.getDataFromDom = function ( domElement, annotations, dataElement, path, alreadyWrapped ) {
function createAlien( domElement, isInline ) {
var type = isInline ? 'alienInline' : 'alienBlock';
return [
{
'type': type,
'attributes': {
'html': $( '<div>' ).append( $( domElement ).clone() ).html()
}
},
{ 'type': '/' + type }
];
}
// Fallback to defaults
annotations = annotations || [];
path = path || ['document'];
2012-08-10 21:09:04 +00:00
var i, childDomElement, annotation, childDataElement, text, childTypes, matches, paragraph,
wrapperElement = dataElement,
data = [],
branchType = path[path.length - 1],
branchIsContent = ve.dm.nodeFactory.canNodeContainContent( branchType ),
wrapping = false;
// Open element
if ( dataElement ) {
data.push( dataElement );
}
// Add contents
for ( i = 0; i < domElement.childNodes.length; i++ ) {
childDomElement = domElement.childNodes[i];
switch ( childDomElement.nodeType ) {
case Node.ELEMENT_NODE:
// Detect generated content and wrap it in an alien node
if ( childDomElement.hasAttribute( 'data-mw-gc' ) ) {
// FIXME Parsoid outputs RDFa now, address this in API rewrite
data = data.concat( createAlien( childDomElement, branchIsContent ) );
break;
}
// Detect and handle annotated content
annotation = this.getDataAnnotationFromDomElement( childDomElement );
if ( annotation ) {
// Start auto-wrapping of bare content
if ( !wrapping && !alreadyWrapped && !branchIsContent ) {
// Mark this paragraph as having been generated by
// us, so we can strip it on the way out
data.push( {
'type': 'paragraph',
'internal': { 'generated': 'wrapper' }
} );
wrapping = true;
}
// Append child element data
data = data.concat(
this.getDataFromDom(
childDomElement, annotations.concat( annotation ), undefined, path, wrapping || alreadyWrapped
)
);
break;
}
// End auto-wrapping of bare content
if ( wrapping ) {
data.push( { 'type': '/paragraph' } );
wrapping = false;
}
// Append child element data
childDataElement = this.getDataElementFromDomElement( childDomElement );
if ( childDataElement ) {
data = data.concat(
this.getDataFromDom(
childDomElement,
[],
childDataElement,
path.concat( childDataElement.type ),
wrapping || alreadyWrapped
)
);
break;
}
// We don't know what this is, fall back to alien
data = data.concat( createAlien( childDomElement, branchIsContent ) );
break;
case Node.TEXT_NODE:
// HACK: strip trailing newline in <li> tags. Workaround for a Parsoid bug
2012-08-10 21:09:04 +00:00
// TODO kill this in favor of the fringe whitespace preservation
// code below
text = childDomElement.data;
if ( domElement.nodeName.toLowerCase() === 'li' ) {
text = text.replace( /\n$/, '' );
}
if ( !branchIsContent ) {
// If it's bare content, strip leading and trailing newlines
// FIXME these newlines should be preserved somehow
text = text.replace( /^\n+/, '' ).replace( /\n+$/, '' );
}
if ( text === '' ) {
// Don't produce an empty text node or an empty paragraph
break;
}
// Start auto-wrapping of bare content
if ( !wrapping && !alreadyWrapped && !branchIsContent ) {
// Mark this paragraph as having been generated by
// us, so we can strip it on the way out
paragraph = {
'type': 'paragraph',
'internal': { 'generated': 'wrapper' }
};
2012-08-10 21:09:04 +00:00
data.push( paragraph );
wrapping = true;
2012-08-10 21:09:04 +00:00
wrapperElement = paragraph;
}
// Strip leading and trailing whitespace
// (but only in non-annotation nodes)
// and store it so it can be restored later.
// whitespace = [ outerPre, innerPre, innerPost, outerPost ]
// <tag> text </tag> <nextTag>
// ^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^ ^^^^^^^^^^
// outerPre innerPre innerPost outerPost
2012-08-10 21:09:04 +00:00
if ( annotations.length === 0 && i === 0 && wrapperElement ) {
// Strip leading whitespace from the first child
matches = text.match( /^\s+/ );
if ( matches && matches[0] !== '' ) {
if ( !wrapperElement.internal ) {
wrapperElement.internal = {};
2012-08-10 21:09:04 +00:00
}
if ( !wrapperElement.internal.whitespace ) {
wrapperElement.internal.whitespace = [];
}
wrapperElement.internal.whitespace[1] = matches[0];
2012-08-10 21:09:04 +00:00
text = text.substring( matches[0].length );
}
}
if (
annotations.length === 0 &&
i === domElement.childNodes.length - 1 &&
wrapperElement
) {
// Strip trailing whitespace from the last child
matches = text.match( /\s+$/ );
if ( matches && matches[0] !== '' ) {
if ( !wrapperElement.internal ) {
wrapperElement.internal = {};
}
if ( !wrapperElement.internal.whitespace ) {
wrapperElement.internal.whitespace = [];
2012-08-10 21:09:04 +00:00
}
wrapperElement.internal.whitespace[2] = matches[0];
2012-08-10 21:09:04 +00:00
text = text.substring( 0,
text.length - matches[0].length );
}
}
// Annotate the text and output it
data = data.concat(
ve.dm.Converter.getDataContentFromText( text, annotations )
);
break;
case Node.COMMENT_NODE:
// TODO: Preserve comments by inserting them into the linear model too
// Could use placeholders for this too, although they'd need to be
// inline in certain cases
break;
}
}
// End auto-wrapping of bare content
if ( wrapping ) {
data.push( { 'type': '/paragraph' } );
}
// If we're closing a node that doesn't have any children, but could contain a paragraph,
// add a paragraph. This prevents things like empty list items
childTypes = ve.dm.nodeFactory.getChildNodeTypes( branchType );
if ( branchType !== 'paragraph' && dataElement && data[data.length - 1] === dataElement &&
!wrapping && !ve.dm.nodeFactory.canNodeContainContent( branchType ) &&
!ve.dm.nodeFactory.isNodeContent( branchType ) &&
Refactor ve.js utilities and improve documentation Refactor: * ve.indexOf Renamed from ve.inArray. This was named after the jQuery method which in turn has a longer story about why it is so unfortunately named. It doesn't return a boolean, but an index. Hence the native method being called indexOf as well. * ve.bind Renamed from ve.proxy. I considered making it use Function.prototype.bind if available. As it performs better than $.proxy (which doesn't use to the native bind if available). However since bind needs to be bound itself in order to use it detached, it turns out with the "call()" and "bind()" it is slower than the $.proxy shim: http://jsperf.com/function-bind-shim-perf It would've been like this: ve.bind = Function.prototype.bind ? Function.prototype.call.bind( Function.prototype.bind ) : $.proxy; But instead sticking to ve.bind = $.proxy; * ve.extendObject Documented the parts of jQuery.extend that we use. This makes it easier to replace in the future. Documentation: * Added function documentation blocks. * Added annotations to functions that we will be able to remove in the future in favour of the native methods. With "@until + when/how". In this case "ES5". Meaning, whenever we drop support for browsers that don't support ES5. Although in the developer community ES5 is still fairly fresh, browsers have been aware for it long enough that thee moment we're able to drop it may be sooner than we think. The only blocker so far is IE8. The rest of the browsers have had it long enough that the traffic we need to support of non-IE supports it. Misc.: * Removed 'node: true' from .jshintrc since Parsoid is no longer in this repo and thus no more nodejs files. - This unraveled two lint errors: Usage of 'module' and 'console'. (both were considered 'safe globals' due to nodejs, but not in browser code). * Replaced usage (before renaming): - $.inArray -> ve.inArray - Function.prototype.bind -> ve.proxy - Array.isArray -> ve.isArray - [].indexOf -> ve.inArray - $.fn.bind/live/delegate/unbind/die/delegate -> $.fn.on/off Change-Id: Idcf1fa6a685b6ed3d7c99ffe17bd57a7bc586a2c
2012-08-11 08:14:56 +00:00
( childTypes === null || ve.indexOf( 'paragraph', childTypes ) !== -1 )
) {
data.push( { 'type': 'paragraph', 'internal': { 'generated': 'wrapper' } } );
data.push( { 'type': '/paragraph' } );
}
// Close element
if ( dataElement ) {
data.push( { 'type': '/' + dataElement.type } );
}
// Don't return an empty document
if ( branchType === 'document' && data.length === 0 ) {
return [
{ 'type': 'paragraph', 'internal': { 'generated': 'wrapper' } },
{ 'type': '/paragraph' }
];
}
return data;
};
/**
* Convert linear model data to an HTML DOM
*
* @method
* @param {Array} data Linear model data
* @returns {HTMLElement} Wrapper div containing the resulting HTML
*/
ve.dm.Converter.prototype.getDomFromData = function ( data ) {
var text, i, j, annotations, hash, annotationElement, done, dataElement, wrapper,
childDomElement, pre, post, parentDomElement,
container = document.createElement( 'div' ),
domElement = container,
annotationStack = {}; // { hash: DOMnode }
for ( i = 0; i < data.length; i++ ) {
if ( typeof data[i] === 'string' ) {
// Text
text = '';
// Continue forward as far as the plain text goes
while ( typeof data[i] === 'string' ) {
text += data[i];
i++;
}
// i points to the first non-text thing, go back one so we don't skip this later
i--;
// Add text
domElement.appendChild( document.createTextNode( text ) );
} else if (
ve.isArray( data[i] ) ||
(
data[i].annotations !== undefined &&
ve.dm.nodeFactory.isNodeContent( data[i].type )
)
) {
// Annotated text
text = '';
while (
ve.isArray( data[i] ) ||
(
data[i].annotations !== undefined &&
ve.dm.nodeFactory.isNodeContent( data[i].type )
)
) {
annotations = data[i].annotations || data[i][1];
// Check for closed annotations
for ( hash in annotationStack ) {
if ( !( hash in annotations ) ) {
// It's closed
// Traverse up until we hit the node we need to close, and then
// traverse up one more time to close that node
done = false;
while ( !done ) {
done = domElement === annotationStack[hash];
// Remove the annotation from the stack
delete annotationStack[domElement.veAnnotationHash];
// Remove the temporary veAnnotationHash property
delete domElement.veAnnotationHash;
// Add text if needed
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Traverse up
domElement = domElement.parentNode;
}
}
}
// Check for opened annotations
for ( hash in annotations ) {
if ( !( hash in annotationStack ) ) {
// It's opened
annotationElement = this.getDomElementFromDataAnnotation( annotations[hash] );
// Temporary property, will remove this when closing the annotation
annotationElement.veAnnotationHash = hash;
// Add to the annotation stack
annotationStack[hash] = annotationElement;
// Add text if needed
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Attach new node and descend into it
domElement.appendChild( annotationElement );
domElement = annotationElement;
}
}
if ( data[i].annotations === undefined ) {
text += data[i][0];
} else {
// Add text if needed
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Insert the element
domElement.appendChild( this.getDomElementFromDataElement( data[i] ) );
// Increment i once more so we skip over the closing as well
i++;
}
i++;
}
// We're now at the first non-annotated thing, go back one so we don't skip this later
i--;
// Add any gathered text
if ( text.length > 0 ) {
domElement.appendChild( document.createTextNode( text ) );
text = '';
}
// Close any remaining annotation nodes
while ( domElement.veAnnotationHash !== undefined ) {
delete annotationStack[domElement.veAnnotationHash];
delete domElement.veAnnotationHash;
domElement = domElement.parentNode;
}
} else if ( data[i].type !== undefined ) {
dataElement = data[i];
// Element
if ( dataElement.type === 'alienBlock' || dataElement.type === 'alienInline' ) {
// Create nodes from source
wrapper = document.createElement( 'div' );
wrapper.innerHTML = dataElement.attributes.html;
// Add element - adds all child elements, but there really should only be 1
while ( wrapper.firstChild ) {
domElement.appendChild( wrapper.firstChild );
}
// Make sure the alien closing is skipped
i++;
} else if ( dataElement.type.charAt( 0 ) === '/' ) {
// Process inner whitespace
// whitespace = [ outerPre, innerPre, innerPost, outerPost ]
if ( domElement.veInternal && domElement.veInternal.whitespace ) {
pre = domElement.veInternal.whitespace[1];
2012-08-10 21:09:04 +00:00
if ( pre ) {
if ( domElement.firstChild.nodeType === 3 ) {
// First child is a TextNode, prepend to it
domElement.firstChild.insertData( 0, pre );
} else {
// Prepend a TextNode
domElement.insertBefore(
document.createTextNode( pre ),
domElement.firstChild
);
}
}
post = domElement.veInternal.whitespace[2];
2012-08-10 21:09:04 +00:00
if ( post ) {
if ( domElement.lastChild.nodeType === 3 ) {
// Last child is a TextNode, append to it
domElement.lastChild.appendData( post );
} else {
// Append a TextNode
domElement.appendChild(
document.createTextNode( post )
);
}
}
}
// If closing a generated wrapper node, unwrap it
// It would be nicer if we could avoid generating in the first
// place, but then remembering where we have to skip ascending
// to the parent would be tricky.
parentDomElement = domElement.parentNode;
if ( domElement.veInternal && domElement.veInternal.generated === 'wrapper' ) {
for ( j = 0; j < domElement.childNodes.length; j++ ) {
parentDomElement.insertBefore(
domElement.childNodes[j],
domElement
);
}
parentDomElement.removeChild( domElement );
}
// Clean up the internal data
delete domElement.veInternal;
// Ascend to parent node
domElement = parentDomElement;
} else {
// Create node from data
childDomElement = this.getDomElementFromDataElement( dataElement );
// Add reference to internal data
if ( dataElement.internal ) {
childDomElement.veInternal = dataElement.internal;
2012-08-10 21:09:04 +00:00
}
// Add element
domElement.appendChild( childDomElement );
// Descend into child node
domElement = childDomElement;
}
}
}
return container;
};
/* Initialization */
ve.dm.converter = new ve.dm.Converter( ve.dm.nodeFactory, ve.dm.annotationFactory );