Implement next/prevBreakOffset and word skipping

This provides the functionality for keyboard word skipping (i.e. pressing ctrl/alt + arrow key). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849
2024-09-24 18:58:42 +00:00 · 2013-04-02 16:50:02 +01:00 · 2013-04-02 16:50:02 +01:00 · f36e68c333
parent 148b6bf8a8
commit f36e68c333
3 changed files with 105 additions and 27 deletions
--- a/modules/unicodejs/unicodejs.wordbreak.js
+++ b/modules/unicodejs/unicodejs.wordbreak.js
@ -32,15 +32,55 @@
 		return null;
 	}
 	/**
 	 * Find the next word break offset.
 	 * @param {unicodeJS.TextString} string TextString
 	 * @param {number} pos Character position
 	 * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
 	 * @returns {number} Returns the next offset which is a word break
 	 */
 	wordbreak.nextBreakOffset = function( string, pos, onlyAlphaNumeric ) {
 		return wordbreak.moveBreakOffset( 1, string, pos, onlyAlphaNumeric );
 	};
 	/**
-	 * Evaluates if the specified position within some text is a word boundary.
+	 * Find the previous word break offset.
-	 * @param {string} text Text
+	 * @param {unicodeJS.TextString} string TextString
 	 * @param {number} pos Character position
-	 * @returns {boolean} Is the position a word boundary
+	 * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
 	 * @returns {number} Returns the previous offset which is a word break
 	 */
-	wordbreak.isBreakInText = function ( text, pos ) {
+	wordbreak.prevBreakOffset = function( string, pos, onlyAlphaNumeric ) {
-		return unicodeJS.wordbreak.isBreakInTextString( new unicodeJS.TextString( text ), pos );
+		return wordbreak.moveBreakOffset( -1, string, pos, onlyAlphaNumeric );
 	};
 	/**
 	 * Find the next word break offset in a specified direction.
 	 * @param {number} direction Direction to search in, should be plus or minus one
 	 * @param {unicodeJS.TextString} string TextString
 	 * @param {number} pos Character position
 	 * @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
 	 * @returns {number} Returns the previous offset which is word break
 	 */
 	wordbreak.moveBreakOffset = function( direction, string, pos, onlyAlphaNumeric ) {
 		var lastGroup, i = pos,
 			// when moving backwards, use the character to the left of the cursor
 			readCharOffset = direction > 0 ? 0 : -1;
 		// Search backwards for the previous break point
 		while ( string.read( i + readCharOffset ) !== null ) {
 			i += direction;
 			if ( unicodeJS.wordbreak.isBreak( string, i ) ) {
 				// Check previous character was alpha-numeric if required
 				if ( onlyAlphaNumeric ) {
 					lastGroup = getGroup( string.read( i - direction + readCharOffset ) );
 					if( lastGroup !== 'ALetter' && lastGroup !== 'Numeric' && lastGroup !== 'Katakana' ) {
 						continue;
 					}
 				}
 				break;
 			}
 		}
 		return i;
 	};
 	/**
@ -49,7 +89,7 @@
 	 * @param {number} pos Character position
 	 * @returns {boolean} Is the position a word boundary
 	 */
-	wordbreak.isBreakInTextString = function ( string, pos ) {
+	wordbreak.isBreak = function ( string, pos ) {
 		// Break at the start and end of text.
 		// WB1: sot ÷
 		// WB2: ÷ eot
--- a/modules/unicodejs/unicodejs.wordbreak.test.js
+++ b/modules/unicodejs/unicodejs.wordbreak.test.js
@ -7,7 +7,7 @@
 QUnit.module( 'unicodeJS.wordbreak' );
-QUnit.test( 'isBreakInText', function ( assert ) {
+QUnit.test( 'isBreak', function ( assert ) {
 	var i, result, context,
 		text =
 			/*jshint quotmark:double */
@ -24,6 +24,7 @@ QUnit.test( 'isBreakInText', function ( assert ) {
 			// 50 - 60
 			" c\u0300\u0327",
 			/*jshint quotmark:single */
 		textString = new unicodeJS.TextString( text ),
 		breaks = [
 			0, 1, 6, 7, 8, 9, 10,
 			11, 12, 13, 14, 15, 16, 17, 19,
@ -43,9 +44,59 @@ QUnit.test( 'isBreakInText', function ( assert ) {
 			text.substring( i, Math.min( i + 4, text.length ) )
 		;
 		assert.equal(
-			unicodeJS.wordbreak.isBreakInText( text, i ),
+			unicodeJS.wordbreak.isBreak( textString, i ),
 			result,
 			'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
 		);
 	}
 });
 QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
 	var i, offset = 0,
 		text = 'The quick brown fox',
 		textString = new unicodeJS.TextString( text ),
 		breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
 	QUnit.expect( 2*(breaks.length - 2) );
 	for ( i = 2; i < breaks.length; i++ ) {
 		offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
 		assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
 	}
 	for ( i = breaks.length - 3; i >= 0; i-- ) {
 		offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
 		assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
 	}
 });
 QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
 	var i, offset = 0,
 		text = '   The quick  brown ..fox jumps... 3.14159 すどくスドク   ',
 		textString = new unicodeJS.TextString( text ),
 		nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
 		prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
 	QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
 	for ( i = 0; i < nextBreaks.length; i++ ) {
 		offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
 		assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
 	}
 	for ( i = 0; i < prevBreaks.length; i++ ) {
 		offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
 		assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
 	}
 	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
 		 12, 'Jump to end of word when starting in middle of word');
 	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
 		 6, 'Jump to end of word when starting at start of word');
 	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
 		 19, 'Jump to end of word when starting in double whitespace');
 	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
 		 14, 'Jump to start of word when starting in middle of word');
 	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
 		 3, 'Jump to start of word when starting at end of word');
 	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
 		 7, 'Jump to start of word when starting in double whitespace');
 });
--- a/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js
+++ b/modules/ve/dm/lineardata/ve.dm.ElementLinearData.js
@ -671,7 +671,7 @@ ve.dm.ElementLinearData.prototype.getNearestStructuralOffset = function ( offset
 * @returns {ve.Range} Range around nearest word boundaries
 */
 ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
-	var offsetLeft, offsetRight, i,
+	var offsetLeft, offsetRight,
 		dataString = new ve.dm.DataString( this.getData() );
 	offset = this.getNearestContentOffset( offset );
@ -679,10 +679,10 @@ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
 	// If the cursor offset is a break (i.e. the start/end of word) we should
 	// check one position either side to see if there is a non-break
 	// and if so, move the offset accordingly
-	if ( unicodeJS.wordbreak.isBreakInTextString( dataString, offset ) ) {
+	if ( unicodeJS.wordbreak.isBreak( dataString, offset ) ) {
-		if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset + 1 ) ) {
+		if ( !unicodeJS.wordbreak.isBreak( dataString, offset + 1 ) ) {
 			offset++;
-		} else if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset - 1 ) ) {
+		} else if ( !unicodeJS.wordbreak.isBreak( dataString, offset - 1 ) ) {
 			offset--;
 		} else {
 			// just return one character to the right, unless we are at the end
@ -695,21 +695,8 @@ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
 		}
 	}
-	i = offset;
+	offsetRight = unicodeJS.wordbreak.nextBreakOffset( dataString, offset );
-	// Search left and right for next break points
+	offsetLeft = unicodeJS.wordbreak.prevBreakOffset( dataString, offset );
 	while ( dataString.read( i++ ) !== null ) {
 		offsetRight = i;
 		if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
 			break;
 		}
 	}
 	i = offset;
 	while ( dataString.read( i-- ) !== null ) {
 		offsetLeft = i;
 		if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
 			break;
 		}
 	}
 	return new ve.Range( offsetLeft, offsetRight );
 };