Ported getDomText and getDomHtml from ve to ve2

Change-Id: I3547d95c2749543eae745722ceddaa0a969e4e80
2024-11-24 22:35:41 +00:00 · 2012-05-02 12:28:27 -07:00 · 2012-05-02 12:28:27 -07:00 · 676ffa4dbc
parent 1d94af144a
commit 676ffa4dbc
1 changed files with 77 additions and 0 deletions
--- a/modules/ve2/ce/ve.ce.js
+++ b/modules/ve2/ce/ve.ce.js
@ -6,3 +6,80 @@
 ve.ce = {
 	//'factory': Initialized in ve.ce.NodeFactory.js
 };
 /**
 * RegExp pattern for matching all whitespaces in HTML text.
 * 
 * \u0020 (32)  space
 * \u00A0 (160) non-breaking space
 * 
 * @static
 * @member
 */
 ve.ce.whitespacePattern = /[\u0020\u00A0]/g;
 /**
 * Gets the plain text of a DOM element.
 * 
 * In the returned string only the contents of text nodes are included.
 * 
 * TODO: The idea of using this method over jQuery's .text() was that it will not traverse into
 * elements that are not contentEditable, however this appears to be missing.
 * 
 * @static
 * @member
 * @param {String} Plain text of DOM element
 */
 ve.ce.getDOMText = function( elem ) {
 	var nodeType = elem.nodeType,
 		text = '';
 	if ( nodeType === 1 || nodeType === 9 ) {
 		// Use textContent || innerText for elements
 		if ( typeof elem.textContent === 'string' ) {
 			return elem.textContent;
 		} else if ( typeof elem.innerText === 'string' ) {
 			// Replace IE's carriage returns
 			return elem.innerText.replace( /\r\n/g, '' );
 		} else {
 			// Traverse it's children
 			for ( elem = elem.firstChild; elem; elem = elem.nextSibling) {
 				text += ve.ce.Surface.getDOMText( elem );
 			}
 		}
 	} else if ( nodeType === 3 || nodeType === 4 ) {
 		return elem.nodeValue;
 	}
 	// Return the text, replacing spaces and non-breaking spaces with spaces?
 	// TODO: Why are we replacing spaces (\u0020) with spaces (' ')
 	return text.replace( ve.ce.whitespacePattern, ' ' );
 };
 /**
 * Gets a hash of a DOM element's structure.
 * 
 * In the returned string text nodes are repesented as "#" and elements are represented as "<type>"
 * and "</type>" where "type" is their element name. This effectively generates an HTML
 * serialization without any attributes or text contents. This can be used to observer structural
 * changes.
 * 
 * @static
 * @member
 * @param {String} Hash of DOM element
 */
 ve.ce.getDOMHash = function( elem ) {
 	var nodeType = elem.nodeType,
 		nodeName = elem.nodeName,
 		hash = '';
 	if ( nodeType === 3 || nodeType === 4 ) {
 		return '#';
 	} else if ( nodeType === 1 || nodeType === 9 ) {
 		hash += '<' + nodeName + '>';
 		// Traverse it's children
 		for ( elem = elem.firstChild; elem; elem = elem.nextSibling) {
 			hash += ve.ce.Surface.getDOMHash( elem );
 		}
 		hash += '</' + nodeName + '>';
 	}
 	return hash;
 };