Implement next/prevBreakOffset and word skipping

This provides the functionality for keyboard word skipping (i.e. pressing ctrl/alt + arrow key). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849
2024-11-28 08:10:35 +00:00 · 2013-04-02 16:50:02 +01:00 · 2013-04-02 16:50:02 +01:00 · f36e68c333
parent 148b6bf8a8
commit f36e68c333
3 changed files with 105 additions and 27 deletions
--- a/modules/unicodejs/unicodejs.wordbreak.js
+++ b/modules/unicodejs/unicodejs.wordbreak.js
@ -32,15 +32,55 @@
 		return null;
 	}

+	/**
+	 * Find the next word break offset.
+	 * @param {unicodeJS.TextString} string TextString
+	 * @param {number} pos Character position
+	 * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
+	 * @returns {number} Returns the next offset which is a word break
+	 */
+	wordbreak.nextBreakOffset = function( string, pos, onlyAlphaNumeric ) {
+		return wordbreak.moveBreakOffset( 1, string, pos, onlyAlphaNumeric );
+	};

 	/**
-	 * Evaluates if the specified position within some text is a word boundary.
-	 * @param {string} text Text
+	 * Find the previous word break offset.
+	 * @param {unicodeJS.TextString} string TextString
 	 * @param {number} pos Character position
-	 * @returns {boolean} Is the position a word boundary
+	 * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
+	 * @returns {number} Returns the previous offset which is a word break
 	 */
-	wordbreak.isBreakInText = function ( text, pos ) {
-		return unicodeJS.wordbreak.isBreakInTextString( new unicodeJS.TextString( text ), pos );
+	wordbreak.prevBreakOffset = function( string, pos, onlyAlphaNumeric ) {
+		return wordbreak.moveBreakOffset( -1, string, pos, onlyAlphaNumeric );
+	};
+
+	/**
+	 * Find the next word break offset in a specified direction.
+	 * @param {number} direction Direction to search in, should be plus or minus one
+	 * @param {unicodeJS.TextString} string TextString
+	 * @param {number} pos Character position
+	 * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
+	 * @returns {number} Returns the previous offset which is word break
+	 */
+	wordbreak.moveBreakOffset = function( direction, string, pos, onlyAlphaNumeric ) {
+		var lastGroup, i = pos,
+			// when moving backwards, use the character to the left of the cursor
+			readCharOffset = direction > 0 ? 0 : -1;
+		// Search backwards for the previous break point
+		while ( string.read( i + readCharOffset ) !== null ) {
+			i += direction;
+			if ( unicodeJS.wordbreak.isBreak( string, i ) ) {
+				// Check previous character was alpha-numeric if required
+				if ( onlyAlphaNumeric ) {
+					lastGroup = getGroup( string.read( i - direction + readCharOffset ) );
+					if( lastGroup !== 'ALetter' && lastGroup !== 'Numeric' && lastGroup !== 'Katakana' ) {
+						continue;
+					}
+				}
+				break;
+			}
+		}
+		return i;
 	};

 	/**
@ -49,7 +89,7 @@
 	 * @param {number} pos Character position
 	 * @returns {boolean} Is the position a word boundary
 	 */
-	wordbreak.isBreakInTextString = function ( string, pos ) {
+	wordbreak.isBreak = function ( string, pos ) {
 		// Break at the start and end of text.
 		// WB1: sot ÷
 		// WB2: ÷ eot
--- a/modules/unicodejs/unicodejs.wordbreak.test.js
+++ b/modules/unicodejs/unicodejs.wordbreak.test.js
@ -7,7 +7,7 @@

 QUnit.module( 'unicodeJS.wordbreak' );

-QUnit.test( 'isBreakInText', function ( assert ) {
+QUnit.test( 'isBreak', function ( assert ) {
 	var i, result, context,
 		text =
 			/*jshint quotmark:double */
@ -24,6 +24,7 @@ QUnit.test( 'isBreakInText', function ( assert ) {
 			// 50 - 60
 			" c\u0300\u0327",
 			/*jshint quotmark:single */
+		textString = new unicodeJS.TextString( text ),
 		breaks = [
 			0, 1, 6, 7, 8, 9, 10,
 			11, 12, 13, 14, 15, 16, 17, 19,
@ -43,9 +44,59 @@ QUnit.test( 'isBreakInText', function ( assert ) {
 			text.substring( i, Math.min( i + 4, text.length ) )
 		;
 		assert.equal(
-			unicodeJS.wordbreak.isBreakInText( text, i ),
+			unicodeJS.wordbreak.isBreak( textString, i ),
 			result,
 			'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
 		);
 	}
 });
+
+QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
+	var i, offset = 0,
+		text = 'The quick brown fox',
+		textString = new unicodeJS.TextString( text ),
+		breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
+
+	QUnit.expect( 2*(breaks.length - 2) );
+
+	for ( i = 2; i < breaks.length; i++ ) {
+		offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
+		assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
+	}
+	for ( i = breaks.length - 3; i >= 0; i-- ) {
+		offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
+		assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
+	}
+});
+
+QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
+	var i, offset = 0,
+		text = '   The quick  brown ..fox jumps... 3.14159 すどくスドク   ',
+		textString = new unicodeJS.TextString( text ),
+		nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
+		prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
+
+	QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
+
+	for ( i = 0; i < nextBreaks.length; i++ ) {
+		offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
+		assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
+	}
+	for ( i = 0; i < prevBreaks.length; i++ ) {
+		offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
+		assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
+	}
+
+	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
+		 12, 'Jump to end of word when starting in middle of word');
+	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
+		 6, 'Jump to end of word when starting at start of word');
+	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
+		 19, 'Jump to end of word when starting in double whitespace');
+	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
+		 14, 'Jump to start of word when starting in middle of word');
+	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
+		 3, 'Jump to start of word when starting at end of word');
+	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
+		 7, 'Jump to start of word when starting in double whitespace');
+});
--- a/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js
+++ b/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js
@ -671,7 +671,7 @@ ve.dm.ElementLinearData.prototype.getNearestStructuralOffset = function ( offset
 * @returns {ve.Range} Range around nearest word boundaries
 */
 ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
-	var offsetLeft, offsetRight, i,
+	var offsetLeft, offsetRight,
 		dataString = new ve.dm.DataString( this.getData() );

 	offset = this.getNearestContentOffset( offset );
@ -679,10 +679,10 @@ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
 	// If the cursor offset is a break (i.e. the start/end of word) we should
 	// check one position either side to see if there is a non-break
 	// and if so, move the offset accordingly
-	if ( unicodeJS.wordbreak.isBreakInTextString( dataString, offset ) ) {
-		if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset + 1 ) ) {
+	if ( unicodeJS.wordbreak.isBreak( dataString, offset ) ) {
+		if ( !unicodeJS.wordbreak.isBreak( dataString, offset + 1 ) ) {
 			offset++;
-		} else if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset - 1 ) ) {
+		} else if ( !unicodeJS.wordbreak.isBreak( dataString, offset - 1 ) ) {
 			offset--;
 		} else {
 			// just return one character to the right, unless we are at the end
@ -695,21 +695,8 @@ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
 		}
 	}

-	i = offset;
-	// Search left and right for next break points
-	while ( dataString.read( i++ ) !== null ) {
-		offsetRight = i;
-		if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
-			break;
-		}
-	}
-	i = offset;
-	while ( dataString.read( i-- ) !== null ) {
-		offsetLeft = i;
-		if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
-			break;
-		}
-	}
+	offsetRight = unicodeJS.wordbreak.nextBreakOffset( dataString, offset );
+	offsetLeft = unicodeJS.wordbreak.prevBreakOffset( dataString, offset );

 	return new ve.Range( offsetLeft, offsetRight );
 };