/** * VisualEditor data model Converter class. * * @copyright 2011-2012 VisualEditor Team and others; see AUTHORS.txt * @license The MIT License (MIT); see LICENSE.txt */ /** * Converter between HTML DOM and VisualEditor linear data. * * @class * @constructor * @param {Object} options Conversion options */ ve.dm.Converter = function ( nodeFactory, annotationFactory ) { // Properties this.nodeFactory = nodeFactory; this.annotationFactory = annotationFactory; this.elements = { 'toDomElement': {}, 'toDataElement': {}, 'dataElementTypes': {} }; this.annotations = { 'toDomElement': {}, 'toDataAnnotation': {} }; // Events this.nodeFactory.addListenerMethod( this, 'register', 'onNodeRegister' ); this.annotationFactory.addListenerMethod( this, 'register', 'onAnnotationRegister' ); }; /* Static Methods */ /** * Get linear model data from a string optionally applying annotations * * @param {String} text Plain text to convert * @param {Array} [annotations] Array of annotation objects to apply * @returns {Array} Linear model data, one element per character */ ve.dm.Converter.getDataContentFromText = function ( text, annotations ) { var characters = text.split( '' ), annotationMap = {}, i; if ( !annotations || annotations.length === 0 ) { return characters; } // Build annotation map for ( i = 0; i < annotations.length; i++ ) { if ( annotations[i].data && ve.isEmptyObject( annotations[i].data ) ) { // Cleanup empty data property delete annotations[i].data; } annotationMap[ve.getHash( annotations[i] )] = annotations[i]; } // Apply annotations to characters for ( i = 0; i < characters.length; i++ ) { // Make a shallow copy of the annotationMap object, otherwise adding an annotation to one // character automatically adds it to all of others as well, annotations should be treated // as immutable, so it's OK to share references, but annotation maps are not immutable, so // its not safe to share references - each annotated character needs its own map characters[i] = [characters[i], ve.extendObject( {}, annotationMap )]; } return characters; }; /* Methods */ /** * Responds to register events from the node factory. * * If a node is special; such as document, alienInline, alienBlock and text; its {converters} * property should be set to null, as to distinguish it from a new node type that someone has simply * forgotten to implement converters for. * * @method * @param {String} type Node type * @param {Function} constructor Node constructor * @throws 'Missing conversion data in node implementation of {type}' */ ve.dm.Converter.prototype.onNodeRegister = function ( dataElementType, constructor ) { if ( constructor.converters === undefined ) { throw new Error( 'Missing conversion data in node implementation of ' + dataElementType ); } else if ( constructor.converters !== null ) { var i, domElementTypes = constructor.converters.domElementTypes, toDomElement = constructor.converters.toDomElement, toDataElement = constructor.converters.toDataElement; // Registration this.elements.toDomElement[dataElementType] = toDomElement; for ( i = 0; i < domElementTypes.length; i++ ) { this.elements.toDataElement[domElementTypes[i]] = toDataElement; this.elements.dataElementTypes[domElementTypes[i]] = dataElementType; } } }; /** * Responds to register events from the annotation factory. * * @method * @param {String} type Base annotation type * @param {Function} constructor Annotation constructor * @throws 'Missing conversion data in annotation implementation of {type}' */ ve.dm.Converter.prototype.onAnnotationRegister = function ( dataElementType, constructor ) { if ( constructor.converters === undefined ) { throw new Error( 'Missing conversion data in annotation implementation of ' + dataElementType ); } else if ( constructor.converters !== null ) { var i, domElementTypes = constructor.converters.domElementTypes, toDomElement = constructor.converters.toDomElement, toDataAnnotation = constructor.converters.toDataAnnotation; // Registration this.annotations.toDomElement[dataElementType] = toDomElement; for ( i = 0; i < domElementTypes.length; i++ ) { this.annotations.toDataAnnotation[domElementTypes[i]] = toDataAnnotation; } } }; /** * Get the DOM element for a given linear model element. * * This invokes the toDomElement function registered for the element type. * NOTE: alienBlock and alienInline elements are not supported, if you pass them this function * will return false. (Opposite of District 9: no aliens allowed.) * * @method * @param {Object} dataElement Linear model element * @returns {HTMLElement|false} DOM element, or false if this element cannot be converted */ ve.dm.Converter.prototype.getDomElementFromDataElement = function ( dataElement ) { var key, domElement, dataElementAttributes, dataElementType = dataElement.type; if ( // Aliens dataElementType === 'alienInline' || dataElementType === 'alienBlock' || // Unsupported elements !( dataElementType in this.elements.toDomElement) ) { return false; } domElement = this.elements.toDomElement[dataElementType]( dataElementType, dataElement ); dataElementAttributes = dataElement.attributes; if ( dataElementAttributes ) { for ( key in dataElementAttributes ) { // Only include 'html/*' attributes and strip the 'html/' from the beginning of the name if ( key.indexOf( 'html/' ) === 0 ) { domElement.setAttribute( key.substr( 5 ), dataElementAttributes[key] ); } } } return domElement; }; /** * Get the linear model data element for a given DOM element. * * This invokes the toDataElement function registered for the element type, after checking that * there is no data-mw-gc attribute. * * @method * @param {HTMLElement} domElement DOM element * @returns {Object|false} Linear model element, or false if this node cannot be converted */ ve.dm.Converter.prototype.getDataElementFromDomElement = function ( domElement ) { var dataElement, domElementAttributes, dataElementAttributes, domElementAttribute, i, domElementType = domElement.nodeName.toLowerCase(); if ( // Generated elements domElement.hasAttribute( 'data-mw-gc' ) || // Unsupported elements !( domElementType in this.elements.toDataElement ) ) { return false; } dataElement = this.elements.toDataElement[domElementType]( domElementType, domElement ); domElementAttributes = domElement.attributes; if ( domElementAttributes.length ) { dataElementAttributes = dataElement.attributes = dataElement.attributes || {}; // Inlcude all attributes and prepend 'html/' to each attribute name for ( i = 0; i < domElementAttributes.length; i++ ) { domElementAttribute = domElementAttributes[i]; dataElementAttributes['html/' + domElementAttribute.name] = domElementAttribute.value; } } return dataElement; }; /** * Check if an HTML DOM node represents an annotation, and if so, build an annotation object for it. * * @example Annotation Object * { 'type': 'type', data: { 'key': 'value', ... } } * * @param {HTMLElement} domElement HTML DOM node * @returns {Object|false} Annotation object, or false if this node is not an annotation */ ve.dm.Converter.prototype.getDataAnnotationFromDomElement = function ( domElement ) { var domElementType = domElement.nodeName.toLowerCase(), toDataAnnotation = this.annotations.toDataAnnotation[domElementType]; if ( typeof toDataAnnotation === 'function' ) { return toDataAnnotation( domElementType, domElement ); } return false; }; /** * Build an HTML DOM node for a linear model annotation. * * @method * @param {Object} dataAnnotation Annotation object * @returns {HTMLElement|false} HTML DOM node, or false if this annotation is not known */ ve.dm.Converter.prototype.getDomElementFromDataAnnotation = function ( dataAnnotation ) { var split = dataAnnotation.type.split( '/' ), baseType = split[0], subType = split.slice( 1 ).join( '/' ), toDomElement = this.annotations.toDomElement[baseType]; if ( typeof toDomElement === 'function' ) { return toDomElement( subType, dataAnnotation ); } return false; }; /** * Convert an HTML DOM tree to a linear model. * * Do not use the annotations, dataElement and path parameters, they're used for internal * recursion only. * * @method * @param {HTMLElement} domElement Wrapper div containing the HTML to convert * @param {Array} [annotations] Array of annotations (objects) to apply to the generated data * @param {Object} [dataElement] Data element to wrap the returned data in * @param {Array} [path] Array of linear model element types * @returns {Array} Linear model data */ ve.dm.Converter.prototype.getDataFromDom = function ( domElement, annotations, dataElement, path, alreadyWrapped ) { function createAlien( domElement, isInline ) { var type = isInline ? 'alienInline' : 'alienBlock'; return [ { 'type': type, 'attributes': { 'html': $( '
' ).append( $( domElement ).clone() ).html() } }, { 'type': '/' + type } ]; } // Fallback to defaults annotations = annotations || []; path = path || ['document']; var i, childDomElement, annotation, childDataElement, text, childTypes, matches, paragraph, wrapperElement = dataElement, data = [], branchType = path[path.length - 1], branchIsContent = ve.dm.nodeFactory.canNodeContainContent( branchType ), wrapping = false; // Open element if ( dataElement ) { data.push( dataElement ); } // Add contents for ( i = 0; i < domElement.childNodes.length; i++ ) { childDomElement = domElement.childNodes[i]; switch ( childDomElement.nodeType ) { case Node.ELEMENT_NODE: // Detect generated content and wrap it in an alien node if ( childDomElement.hasAttribute( 'data-mw-gc' ) ) { // FIXME Parsoid outputs RDFa now, address this in API rewrite data = data.concat( createAlien( childDomElement, branchIsContent ) ); break; } // Detect and handle annotated content annotation = this.getDataAnnotationFromDomElement( childDomElement ); if ( annotation ) { // Start auto-wrapping of bare content if ( !wrapping && !alreadyWrapped && !branchIsContent ) { // Mark this paragraph as having been generated by // us, so we can strip it on the way out data.push( { 'type': 'paragraph', 'internal': { 'generated': 'wrapper' } } ); wrapping = true; } // Append child element data data = data.concat( this.getDataFromDom( childDomElement, annotations.concat( annotation ), undefined, path, wrapping || alreadyWrapped ) ); break; } // End auto-wrapping of bare content if ( wrapping ) { data.push( { 'type': '/paragraph' } ); wrapping = false; } // Append child element data childDataElement = this.getDataElementFromDomElement( childDomElement ); if ( childDataElement ) { data = data.concat( this.getDataFromDom( childDomElement, [], childDataElement, path.concat( childDataElement.type ), wrapping || alreadyWrapped ) ); break; } // We don't know what this is, fall back to alien data = data.concat( createAlien( childDomElement, branchIsContent ) ); break; case Node.TEXT_NODE: // HACK: strip trailing newline in
  • tags. Workaround for a Parsoid bug // TODO kill this in favor of the fringe whitespace preservation // code below text = childDomElement.data; if ( domElement.nodeName.toLowerCase() === 'li' ) { text = text.replace( /\n$/, '' ); } if ( !branchIsContent ) { // If it's bare content, strip leading and trailing newlines // FIXME these newlines should be preserved somehow text = text.replace( /^\n+/, '' ).replace( /\n+$/, '' ); } if ( text === '' ) { // Don't produce an empty text node or an empty paragraph break; } // Start auto-wrapping of bare content if ( !wrapping && !alreadyWrapped && !branchIsContent ) { // Mark this paragraph as having been generated by // us, so we can strip it on the way out paragraph = { 'type': 'paragraph', 'internal': { 'generated': 'wrapper' } }; data.push( paragraph ); wrapping = true; wrapperElement = paragraph; } // Strip leading and trailing whitespace // (but only in non-annotation nodes) // and store it so it can be restored later. // whitespace = [ outerPre, innerPre, innerPost, outerPost ] // text // ^^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^ ^^^^^^^^^^ // outerPre innerPre innerPost outerPost if ( annotations.length === 0 && i === 0 && wrapperElement ) { // Strip leading whitespace from the first child matches = text.match( /^\s+/ ); if ( matches && matches[0] !== '' ) { if ( !wrapperElement.internal ) { wrapperElement.internal = {}; } if ( !wrapperElement.internal.whitespace ) { wrapperElement.internal.whitespace = []; } wrapperElement.internal.whitespace[1] = matches[0]; text = text.substring( matches[0].length ); } } if ( annotations.length === 0 && i === domElement.childNodes.length - 1 && wrapperElement ) { // Strip trailing whitespace from the last child matches = text.match( /\s+$/ ); if ( matches && matches[0] !== '' ) { if ( !wrapperElement.internal ) { wrapperElement.internal = {}; } if ( !wrapperElement.internal.whitespace ) { wrapperElement.internal.whitespace = []; } wrapperElement.internal.whitespace[2] = matches[0]; text = text.substring( 0, text.length - matches[0].length ); } } // Annotate the text and output it data = data.concat( ve.dm.Converter.getDataContentFromText( text, annotations ) ); break; case Node.COMMENT_NODE: // TODO: Preserve comments by inserting them into the linear model too // Could use placeholders for this too, although they'd need to be // inline in certain cases break; } } // End auto-wrapping of bare content if ( wrapping ) { data.push( { 'type': '/paragraph' } ); } // If we're closing a node that doesn't have any children, but could contain a paragraph, // add a paragraph. This prevents things like empty list items childTypes = ve.dm.nodeFactory.getChildNodeTypes( branchType ); if ( branchType !== 'paragraph' && dataElement && data[data.length - 1] === dataElement && !wrapping && !ve.dm.nodeFactory.canNodeContainContent( branchType ) && !ve.dm.nodeFactory.isNodeContent( branchType ) && ( childTypes === null || ve.indexOf( 'paragraph', childTypes ) !== -1 ) ) { data.push( { 'type': 'paragraph', 'internal': { 'generated': 'wrapper' } } ); data.push( { 'type': '/paragraph' } ); } // Close element if ( dataElement ) { data.push( { 'type': '/' + dataElement.type } ); } // Don't return an empty document if ( branchType === 'document' && data.length === 0 ) { return [ { 'type': 'paragraph', 'internal': { 'generated': 'wrapper' } }, { 'type': '/paragraph' } ]; } return data; }; /** * Convert linear model data to an HTML DOM * * @method * @param {Array} data Linear model data * @returns {HTMLElement} Wrapper div containing the resulting HTML */ ve.dm.Converter.prototype.getDomFromData = function ( data ) { var text, i, j, annotations, hash, annotationElement, done, dataElement, wrapper, childDomElement, pre, post, parentDomElement, container = document.createElement( 'div' ), domElement = container, annotationStack = {}; // { hash: DOMnode } for ( i = 0; i < data.length; i++ ) { if ( typeof data[i] === 'string' ) { // Text text = ''; // Continue forward as far as the plain text goes while ( typeof data[i] === 'string' ) { text += data[i]; i++; } // i points to the first non-text thing, go back one so we don't skip this later i--; // Add text domElement.appendChild( document.createTextNode( text ) ); } else if ( ve.isArray( data[i] ) || ( data[i].annotations !== undefined && ve.dm.nodeFactory.isNodeContent( data[i].type ) ) ) { // Annotated text text = ''; while ( ve.isArray( data[i] ) || ( data[i].annotations !== undefined && ve.dm.nodeFactory.isNodeContent( data[i].type ) ) ) { annotations = data[i].annotations || data[i][1]; // Check for closed annotations for ( hash in annotationStack ) { if ( !( hash in annotations ) ) { // It's closed // Traverse up until we hit the node we need to close, and then // traverse up one more time to close that node done = false; while ( !done ) { done = domElement === annotationStack[hash]; // Remove the annotation from the stack delete annotationStack[domElement.veAnnotationHash]; // Remove the temporary veAnnotationHash property delete domElement.veAnnotationHash; // Add text if needed if ( text.length > 0 ) { domElement.appendChild( document.createTextNode( text ) ); text = ''; } // Traverse up domElement = domElement.parentNode; } } } // Check for opened annotations for ( hash in annotations ) { if ( !( hash in annotationStack ) ) { // It's opened annotationElement = this.getDomElementFromDataAnnotation( annotations[hash] ); // Temporary property, will remove this when closing the annotation annotationElement.veAnnotationHash = hash; // Add to the annotation stack annotationStack[hash] = annotationElement; // Add text if needed if ( text.length > 0 ) { domElement.appendChild( document.createTextNode( text ) ); text = ''; } // Attach new node and descend into it domElement.appendChild( annotationElement ); domElement = annotationElement; } } if ( data[i].annotations === undefined ) { text += data[i][0]; } else { // Add text if needed if ( text.length > 0 ) { domElement.appendChild( document.createTextNode( text ) ); text = ''; } // Insert the element domElement.appendChild( this.getDomElementFromDataElement( data[i] ) ); // Increment i once more so we skip over the closing as well i++; } i++; } // We're now at the first non-annotated thing, go back one so we don't skip this later i--; // Add any gathered text if ( text.length > 0 ) { domElement.appendChild( document.createTextNode( text ) ); text = ''; } // Close any remaining annotation nodes while ( domElement.veAnnotationHash !== undefined ) { delete annotationStack[domElement.veAnnotationHash]; delete domElement.veAnnotationHash; domElement = domElement.parentNode; } } else if ( data[i].type !== undefined ) { dataElement = data[i]; // Element if ( dataElement.type === 'alienBlock' || dataElement.type === 'alienInline' ) { // Create nodes from source wrapper = document.createElement( 'div' ); wrapper.innerHTML = dataElement.attributes.html; // Add element - adds all child elements, but there really should only be 1 while ( wrapper.firstChild ) { domElement.appendChild( wrapper.firstChild ); } // Make sure the alien closing is skipped i++; } else if ( dataElement.type.charAt( 0 ) === '/' ) { // Process inner whitespace // whitespace = [ outerPre, innerPre, innerPost, outerPost ] if ( domElement.veInternal && domElement.veInternal.whitespace ) { pre = domElement.veInternal.whitespace[1]; if ( pre ) { if ( domElement.firstChild.nodeType === 3 ) { // First child is a TextNode, prepend to it domElement.firstChild.insertData( 0, pre ); } else { // Prepend a TextNode domElement.insertBefore( document.createTextNode( pre ), domElement.firstChild ); } } post = domElement.veInternal.whitespace[2]; if ( post ) { if ( domElement.lastChild.nodeType === 3 ) { // Last child is a TextNode, append to it domElement.lastChild.appendData( post ); } else { // Append a TextNode domElement.appendChild( document.createTextNode( post ) ); } } } // If closing a generated wrapper node, unwrap it // It would be nicer if we could avoid generating in the first // place, but then remembering where we have to skip ascending // to the parent would be tricky. parentDomElement = domElement.parentNode; if ( domElement.veInternal && domElement.veInternal.generated === 'wrapper' ) { for ( j = 0; j < domElement.childNodes.length; j++ ) { parentDomElement.insertBefore( domElement.childNodes[j], domElement ); } parentDomElement.removeChild( domElement ); } // Clean up the internal data delete domElement.veInternal; // Ascend to parent node domElement = parentDomElement; } else { // Create node from data childDomElement = this.getDomElementFromDataElement( dataElement ); // Add reference to internal data if ( dataElement.internal ) { childDomElement.veInternal = dataElement.internal; } // Add element domElement.appendChild( childDomElement ); // Descend into child node domElement = childDomElement; } } } return container; }; /* Initialization */ ve.dm.converter = new ve.dm.Converter( ve.dm.nodeFactory, ve.dm.annotationFactory );