Ported getDomText and getDomHtml from ve to ve2

Change-Id: I3547d95c2749543eae745722ceddaa0a969e4e80
2024-11-24 14:33:59 +00:00 · 2012-05-02 12:28:27 -07:00 · 2012-05-02 12:28:27 -07:00 · 676ffa4dbc
parent 1d94af144a
commit 676ffa4dbc
1 changed files with 77 additions and 0 deletions
--- a/modules/ve2/ce/ve.ce.js
+++ b/modules/ve2/ce/ve.ce.js
@ -6,3 +6,80 @@
 ve.ce = {
 	//'factory': Initialized in ve.ce.NodeFactory.js
 };
+
+/**
+ * RegExp pattern for matching all whitespaces in HTML text.
+ * 
+ * \u0020 (32)  space
+ * \u00A0 (160) non-breaking space
+ * 
+ * @static
+ * @member
+ */
+ve.ce.whitespacePattern = /[\u0020\u00A0]/g;
+
+/**
+ * Gets the plain text of a DOM element.
+ * 
+ * In the returned string only the contents of text nodes are included.
+ * 
+ * TODO: The idea of using this method over jQuery's .text() was that it will not traverse into
+ * elements that are not contentEditable, however this appears to be missing.
+ * 
+ * @static
+ * @member
+ * @param {String} Plain text of DOM element
+ */
+ve.ce.getDOMText = function( elem ) {
+	var nodeType = elem.nodeType,
+		text = '';
+	if ( nodeType === 1 || nodeType === 9 ) {
+		// Use textContent || innerText for elements
+		if ( typeof elem.textContent === 'string' ) {
+			return elem.textContent;
+		} else if ( typeof elem.innerText === 'string' ) {
+			// Replace IE's carriage returns
+			return elem.innerText.replace( /\r\n/g, '' );
+		} else {
+			// Traverse it's children
+			for ( elem = elem.firstChild; elem; elem = elem.nextSibling) {
+				text += ve.ce.Surface.getDOMText( elem );
+			}
+		}
+	} else if ( nodeType === 3 || nodeType === 4 ) {
+		return elem.nodeValue;
+	}
+	// Return the text, replacing spaces and non-breaking spaces with spaces?
+	// TODO: Why are we replacing spaces (\u0020) with spaces (' ')
+	return text.replace( ve.ce.whitespacePattern, ' ' );
+};
+
+/**
+ * Gets a hash of a DOM element's structure.
+ * 
+ * In the returned string text nodes are repesented as "#" and elements are represented as "<type>"
+ * and "</type>" where "type" is their element name. This effectively generates an HTML
+ * serialization without any attributes or text contents. This can be used to observer structural
+ * changes.
+ * 
+ * @static
+ * @member
+ * @param {String} Hash of DOM element
+ */
+ve.ce.getDOMHash = function( elem ) {
+	var nodeType = elem.nodeType,
+		nodeName = elem.nodeName,
+		hash = '';
+
+	if ( nodeType === 3 || nodeType === 4 ) {
+		return '#';
+	} else if ( nodeType === 1 || nodeType === 9 ) {
+		hash += '<' + nodeName + '>';
+		// Traverse it's children
+		for ( elem = elem.firstChild; elem; elem = elem.nextSibling) {
+			hash += ve.ce.Surface.getDOMHash( elem );
+		}
+		hash += '</' + nodeName + '>';
+	}
+	return hash;
+};