mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-12-01 17:36:35 +00:00
1c78d0a38c
unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f
114 lines
4 KiB
JavaScript
114 lines
4 KiB
JavaScript
/*!
|
|
* Wordbreak module tests
|
|
*
|
|
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
|
* @license The MIT License (MIT); see LICENSE.txt
|
|
*/
|
|
|
|
QUnit.module( 'unicodeJS.wordbreak' );
|
|
|
|
QUnit.test( 'isBreak', function ( assert ) {
|
|
var i, result, context,
|
|
text =
|
|
/*jshint quotmark:double */
|
|
// 0 - 10
|
|
"\u0300xyz'd a' " +
|
|
// 10 - 20
|
|
"'a a-b 1a\r" +
|
|
// 20 - 30
|
|
"\nカタカナ3,1.2" +
|
|
// 30 - 40
|
|
" a_b_3_ナ_ " +
|
|
// 40 - 50
|
|
"汉字/漢字 c\u0300\u0327k " +
|
|
// 50 - 60
|
|
"\ud800\udf08" + // U+10308 OLD ITALIC LETTER THE
|
|
"\ud800\udf08\u0302" + // U+10308 OLD ITALIC LETTER THE + combining circumflex
|
|
"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA
|
|
" pad " +
|
|
"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA
|
|
"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA
|
|
// 60 - 65: "a." tests end of para
|
|
" c\u0300\u0327 a.",
|
|
/*jshint quotmark:single */
|
|
textString = new unicodeJS.TextString( text ),
|
|
breaks = [
|
|
0, 1, 6, 7, 8, 9, 10,
|
|
11, 12, 13, 14, 15, 16, 17, 19,
|
|
21, 25, 30,
|
|
31, 39, 40,
|
|
41, 42, 43, 44, 45, 46, 48, 49, 50,
|
|
51, 52, // TODO: these should not be in the list
|
|
53, 54, 57, 58,
|
|
59, // TODO: this should not be in the list
|
|
60,
|
|
61, 62, 63, 64, 65
|
|
];
|
|
|
|
QUnit.expect( textString.getLength() + 1 );
|
|
|
|
for ( i = 0; i <= textString.getLength(); i++ ) {
|
|
result = ( breaks.indexOf( i ) !== -1 );
|
|
context =
|
|
textString.substring( Math.max( i - 4, 0 ), i ).getString() +
|
|
'│' +
|
|
textString.substring( i, Math.min( i + 4, text.length ) ).getString()
|
|
;
|
|
assert.equal(
|
|
unicodeJS.wordbreak.isBreak( textString, i ),
|
|
result,
|
|
'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
|
|
);
|
|
}
|
|
});
|
|
|
|
QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
|
|
var i, offset = 0,
|
|
text = 'The quick brown fox',
|
|
textString = new unicodeJS.TextString( text ),
|
|
breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
|
|
|
|
QUnit.expect( 2*(breaks.length - 2) );
|
|
|
|
for ( i = 2; i < breaks.length; i++ ) {
|
|
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
|
|
assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
|
|
}
|
|
for ( i = breaks.length - 3; i >= 0; i-- ) {
|
|
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
|
|
assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
|
|
}
|
|
});
|
|
|
|
QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
|
|
var i, offset = 0,
|
|
text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',
|
|
textString = new unicodeJS.TextString( text ),
|
|
nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
|
|
prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
|
|
|
|
QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
|
|
|
|
for ( i = 0; i < nextBreaks.length; i++ ) {
|
|
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
|
|
assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
|
|
}
|
|
for ( i = 0; i < prevBreaks.length; i++ ) {
|
|
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
|
|
assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
|
|
}
|
|
|
|
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
|
|
12, 'Jump to end of word when starting in middle of word');
|
|
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
|
|
6, 'Jump to end of word when starting at start of word');
|
|
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
|
|
19, 'Jump to end of word when starting in double whitespace');
|
|
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
|
|
14, 'Jump to start of word when starting in middle of word');
|
|
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
|
|
3, 'Jump to start of word when starting at end of word');
|
|
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
|
|
7, 'Jump to start of word when starting in double whitespace');
|
|
});
|