Implement next/prevBreakOffset and word skipping

This provides the functionality for keyboard word skipping
(i.e. pressing ctrl/alt + arrow key).

Bug: 46794
Change-Id: Ib0861fa075df805410717a148b8a6e166d947849
This commit is contained in:
Ed Sanders 2013-04-02 16:50:02 +01:00 committed by Catrope
parent 148b6bf8a8
commit f36e68c333
3 changed files with 105 additions and 27 deletions

View file

@ -32,15 +32,55 @@
return null;
}
/**
* Find the next word break offset.
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
* @returns {number} Returns the next offset which is a word break
*/
wordbreak.nextBreakOffset = function( string, pos, onlyAlphaNumeric ) {
return wordbreak.moveBreakOffset( 1, string, pos, onlyAlphaNumeric );
};
/**
* Evaluates if the specified position within some text is a word boundary.
* @param {string} text Text
* Find the previous word break offset.
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @returns {boolean} Is the position a word boundary
* @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
* @returns {number} Returns the previous offset which is a word break
*/
wordbreak.isBreakInText = function ( text, pos ) {
return unicodeJS.wordbreak.isBreakInTextString( new unicodeJS.TextString( text ), pos );
wordbreak.prevBreakOffset = function( string, pos, onlyAlphaNumeric ) {
return wordbreak.moveBreakOffset( -1, string, pos, onlyAlphaNumeric );
};
/**
* Find the next word break offset in a specified direction.
* @param {number} direction Direction to search in, should be plus or minus one
* @param {unicodeJS.TextString} string TextString
* @param {number} pos Character position
* @param {boolean} [onlyAlphaNumeric=false] When set, ignores a break if the previous character is not alphaNumeric
* @returns {number} Returns the previous offset which is word break
*/
wordbreak.moveBreakOffset = function( direction, string, pos, onlyAlphaNumeric ) {
var lastGroup, i = pos,
// when moving backwards, use the character to the left of the cursor
readCharOffset = direction > 0 ? 0 : -1;
// Search backwards for the previous break point
while ( string.read( i + readCharOffset ) !== null ) {
i += direction;
if ( unicodeJS.wordbreak.isBreak( string, i ) ) {
// Check previous character was alpha-numeric if required
if ( onlyAlphaNumeric ) {
lastGroup = getGroup( string.read( i - direction + readCharOffset ) );
if( lastGroup !== 'ALetter' && lastGroup !== 'Numeric' && lastGroup !== 'Katakana' ) {
continue;
}
}
break;
}
}
return i;
};
/**
@ -49,7 +89,7 @@
* @param {number} pos Character position
* @returns {boolean} Is the position a word boundary
*/
wordbreak.isBreakInTextString = function ( string, pos ) {
wordbreak.isBreak = function ( string, pos ) {
// Break at the start and end of text.
// WB1: sot ÷
// WB2: ÷ eot

View file

@ -7,7 +7,7 @@
QUnit.module( 'unicodeJS.wordbreak' );
QUnit.test( 'isBreakInText', function ( assert ) {
QUnit.test( 'isBreak', function ( assert ) {
var i, result, context,
text =
/*jshint quotmark:double */
@ -24,6 +24,7 @@ QUnit.test( 'isBreakInText', function ( assert ) {
// 50 - 60
" c\u0300\u0327",
/*jshint quotmark:single */
textString = new unicodeJS.TextString( text ),
breaks = [
0, 1, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 19,
@ -43,9 +44,59 @@ QUnit.test( 'isBreakInText', function ( assert ) {
text.substring( i, Math.min( i + 4, text.length ) )
;
assert.equal(
unicodeJS.wordbreak.isBreakInText( text, i ),
unicodeJS.wordbreak.isBreak( textString, i ),
result,
'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
);
}
});
QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
var i, offset = 0,
text = 'The quick brown fox',
textString = new unicodeJS.TextString( text ),
breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
QUnit.expect( 2*(breaks.length - 2) );
for ( i = 2; i < breaks.length; i++ ) {
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
}
for ( i = breaks.length - 3; i >= 0; i-- ) {
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
}
});
QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
var i, offset = 0,
text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',
textString = new unicodeJS.TextString( text ),
nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
for ( i = 0; i < nextBreaks.length; i++ ) {
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
}
for ( i = 0; i < prevBreaks.length; i++ ) {
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
}
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
12, 'Jump to end of word when starting in middle of word');
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
6, 'Jump to end of word when starting at start of word');
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
19, 'Jump to end of word when starting in double whitespace');
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
14, 'Jump to start of word when starting in middle of word');
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
3, 'Jump to start of word when starting at end of word');
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
7, 'Jump to start of word when starting in double whitespace');
});

View file

@ -671,7 +671,7 @@ ve.dm.ElementLinearData.prototype.getNearestStructuralOffset = function ( offset
* @returns {ve.Range} Range around nearest word boundaries
*/
ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
var offsetLeft, offsetRight, i,
var offsetLeft, offsetRight,
dataString = new ve.dm.DataString( this.getData() );
offset = this.getNearestContentOffset( offset );
@ -679,10 +679,10 @@ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
// If the cursor offset is a break (i.e. the start/end of word) we should
// check one position either side to see if there is a non-break
// and if so, move the offset accordingly
if ( unicodeJS.wordbreak.isBreakInTextString( dataString, offset ) ) {
if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset + 1 ) ) {
if ( unicodeJS.wordbreak.isBreak( dataString, offset ) ) {
if ( !unicodeJS.wordbreak.isBreak( dataString, offset + 1 ) ) {
offset++;
} else if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset - 1 ) ) {
} else if ( !unicodeJS.wordbreak.isBreak( dataString, offset - 1 ) ) {
offset--;
} else {
// just return one character to the right, unless we are at the end
@ -695,21 +695,8 @@ ve.dm.ElementLinearData.prototype.getNearestWordRange = function ( offset ) {
}
}
i = offset;
// Search left and right for next break points
while ( dataString.read( i++ ) !== null ) {
offsetRight = i;
if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
break;
}
}
i = offset;
while ( dataString.read( i-- ) !== null ) {
offsetLeft = i;
if ( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
break;
}
}
offsetRight = unicodeJS.wordbreak.nextBreakOffset( dataString, offset );
offsetLeft = unicodeJS.wordbreak.prevBreakOffset( dataString, offset );
return new ve.Range( offsetLeft, offsetRight );
};