mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-09-25 11:16:51 +00:00
Separate out UnicodeJS tests properly
Also fix some comment & whitespace inconsistencies. Change-Id: I71717643678445590820e174e6ed2e5ac58103c2
This commit is contained in:
parent
330c39215c
commit
6d999d8504
|
@ -26,7 +26,9 @@
|
|||
<script src="unicodejs.wordbreakproperties.js"></script>
|
||||
<script src="unicodejs.wordbreak.js"></script>
|
||||
|
||||
<script src="unicodejs.wordbreak.test.js"></script>
|
||||
<script src="test/unicodejs.test.js"></script>
|
||||
<script src="test/unicodejs.graphemebreak.test.js"></script>
|
||||
<script src="test/unicodejs.wordbreak.test.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="qunit"></div>
|
||||
|
|
34
modules/unicodejs/test/unicodejs.graphemebreak.test.js
Normal file
34
modules/unicodejs/test/unicodejs.graphemebreak.test.js
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*!
|
||||
* UnicodeJS Grapheme Break module tests
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
QUnit.module( 'unicodeJS.graphemebreak' );
|
||||
|
||||
QUnit.test( 'splitClusters', 1, function ( assert ) {
|
||||
var expected = [
|
||||
'a',
|
||||
' ',
|
||||
' ',
|
||||
'b',
|
||||
'カ',
|
||||
'タ',
|
||||
'カ',
|
||||
'ナ',
|
||||
'c\u0300\u0327', // c with two combining chars
|
||||
'\ud800\udf08', // U+10308 OLD ITALIC LETTER THE
|
||||
'\ud800\udf08\u0302', // U+10308 + combining circumflex
|
||||
'\r\n',
|
||||
'\n',
|
||||
'\u1104\u1173', // jamo L+V
|
||||
'\u1105\u1161\u11a8', // jamo L+V+T
|
||||
'\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters
|
||||
];
|
||||
assert.deepEqual(
|
||||
unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ),
|
||||
expected,
|
||||
'Split clusters'
|
||||
);
|
||||
});
|
128
modules/unicodejs/test/unicodejs.test.js
Normal file
128
modules/unicodejs/test/unicodejs.test.js
Normal file
|
@ -0,0 +1,128 @@
|
|||
/*!
|
||||
* UnicodeJS Base module tests
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
QUnit.module( 'unicodeJS' );
|
||||
|
||||
QUnit.test( 'charRangeArrayRegexp', function ( assert ) {
|
||||
var i, test, doTestFunc, equalityTests, throwTests;
|
||||
|
||||
equalityTests = [
|
||||
[[0x0040], '\\u0040', 'single BMP character'],
|
||||
[[0xFFFF], '\\uffff', 'highest BMP character'],
|
||||
[
|
||||
[0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34],
|
||||
[0xFE4D, 0xFE4F], 0xFF3F],
|
||||
'[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]',
|
||||
'multiple BMP ranges (= ExtendNumLet from wordbreak rules)'
|
||||
],
|
||||
[[0xD7FF], '\\ud7ff', 'just below surrogate range'],
|
||||
[[0xE000], '\\ue000', 'just above surrogate range'],
|
||||
[[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'],
|
||||
[[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP character'],
|
||||
[[0x103FF], '\\ud800\\udfff', 'highest character with D800 leading surrogate'],
|
||||
[[0x10400], '\\ud801\\udc00', 'lowest character with D801 leading surrogate'],
|
||||
[
|
||||
[[0xFF00, 0xFFFF]],
|
||||
'[\\uff00-\\uffff]',
|
||||
'single range at top of BMP'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x10000]],
|
||||
'[\\uff00-\\uffff]|\\ud800\\udc00',
|
||||
'single range spanning BMP and non-BMP'
|
||||
],
|
||||
[
|
||||
[0xFFFF, 0x10000, 0x10002],
|
||||
'\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could compact
|
||||
'single characters, both BMP and non-BMP'
|
||||
],
|
||||
[
|
||||
[[0x0300, 0x0400], 0x10FFFF],
|
||||
'[\\u0300-\\u0400]|\\udbff\\udfff',
|
||||
'BMP range and non-BMP character'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x103FF]],
|
||||
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]',
|
||||
'range to top of D800 leading surrogate range'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x10400]],
|
||||
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00',
|
||||
'range to start of D801 leading surrogate range'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x10401]],
|
||||
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]',
|
||||
'range past start of D801 leading surrogate range'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x15555]],
|
||||
'[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]',
|
||||
'range spanning multiple leading surrogate ranges'
|
||||
],
|
||||
[
|
||||
[[0x10454, 0x10997]],
|
||||
'\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]',
|
||||
'range starting within one leading surrogate range, and ending in the next'
|
||||
],
|
||||
[
|
||||
[[0x20222, 0x29999]],
|
||||
'\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]',
|
||||
'range starting within one leading surrogate range, and ending in a distant one'
|
||||
],
|
||||
[
|
||||
[0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F,
|
||||
[0x200E, 0x200F], [0x202A, 0x202E], [0x2060, 0x2064],
|
||||
[0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB],
|
||||
0x110BD, [0x1D173, 0x1D17A],
|
||||
0xE0001, [0xE0020, 0xE007F]],
|
||||
// TODO: could compact
|
||||
'[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' +
|
||||
'\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' +
|
||||
'\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' +
|
||||
'|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' +
|
||||
'|\\udb40[\\udc20-\\udc7f]',
|
||||
'multiple BMP and non-BMP ranges (= Format from wordbreak rules)'
|
||||
],
|
||||
[
|
||||
[[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]],
|
||||
'[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]',
|
||||
'largest possible range'
|
||||
]
|
||||
];
|
||||
throwTests = [
|
||||
[[0xD800], 'surrogate character U+D800'],
|
||||
[[0xDFFF], 'surrogate character U+DFFF'],
|
||||
[[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'],
|
||||
[[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'],
|
||||
[[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'],
|
||||
[[[0xCCCC, 0xEEEE]], 'surrogate overlap 4']
|
||||
];
|
||||
|
||||
QUnit.expect( equalityTests.length + throwTests.length );
|
||||
for ( i = 0; i < equalityTests.length; i++ ) {
|
||||
test = equalityTests[i];
|
||||
assert.equal(
|
||||
unicodeJS.charRangeArrayRegexp( test[0] ),
|
||||
test[1],
|
||||
test[2]
|
||||
);
|
||||
}
|
||||
for ( i = 0; i < throwTests.length; i++ ) {
|
||||
/*jshint loopfunc:true */
|
||||
test = throwTests[i];
|
||||
doTestFunc = function () {
|
||||
unicodeJS.charRangeArrayRegexp( test[0] );
|
||||
};
|
||||
assert.throws(
|
||||
doTestFunc,
|
||||
Error,
|
||||
'throw: ' + test[1]
|
||||
);
|
||||
}
|
||||
});
|
109
modules/unicodejs/test/unicodejs.wordbreak.test.js
Normal file
109
modules/unicodejs/test/unicodejs.wordbreak.test.js
Normal file
|
@ -0,0 +1,109 @@
|
|||
/*!
|
||||
* UnicodeJS Word Break module tests
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
QUnit.module( 'unicodeJS.wordbreak' );
|
||||
|
||||
QUnit.test( 'isBreak', function ( assert ) {
|
||||
var i, pos, result, context, breakOffsets, textString,
|
||||
broken = [
|
||||
'\u0300', 'xyz\'d', ' ', 'a', '\'', ' ',
|
||||
'\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n',
|
||||
'カタカナ', '3,1.2', ' ',
|
||||
'a_b_3_ナ_', ' ',
|
||||
'汉', '字', '/', '漢', '字', ' ',
|
||||
'c\u0300\u0327k', ' ',
|
||||
// Test ALetter characters above U+FFFF.
|
||||
// ALetter+ should be a single word
|
||||
// (ALetter Extend*)+ should be a single word
|
||||
//
|
||||
// We'll use:
|
||||
// U+10308 OLD ITALIC LETTER THE \ud800\udf08
|
||||
// U+1030A OLD ITALIC LETTER KA \ud800\udf0a
|
||||
// U+0302 COMBINING CIRCUMFLEX \u0302
|
||||
'\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a',
|
||||
' ',
|
||||
'\ud800\udf0a' + '\ud800\udf0a',
|
||||
' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed)
|
||||
//// TODO: test the equivalent hangul decomposed into jamo
|
||||
//// '\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' +
|
||||
//// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' +
|
||||
' ', 'c\u0300\u0327', ' ', 'a', '.'
|
||||
];
|
||||
breakOffsets = [0];
|
||||
pos = 0;
|
||||
for ( i = 0; i < broken.length; i++ ) {
|
||||
pos += unicodeJS.graphemebreak.splitClusters( broken[i] ).length;
|
||||
breakOffsets.push( pos );
|
||||
}
|
||||
textString = new unicodeJS.TextString( broken.join( '' ) ),
|
||||
|
||||
QUnit.expect( textString.getLength() + 1 );
|
||||
|
||||
for ( i = 0; i <= textString.getLength(); i++ ) {
|
||||
result = ( breakOffsets.indexOf( i ) !== -1 );
|
||||
context =
|
||||
textString.substring( Math.max( i - 4, 0 ), i ).getString() +
|
||||
'│' +
|
||||
textString.substring( i, Math.min( i + 4, textString.getLength() ) ).getString()
|
||||
;
|
||||
assert.equal(
|
||||
unicodeJS.wordbreak.isBreak( textString, i ),
|
||||
result,
|
||||
'Break at position ' + i + ' (expect ' + result + '): ' + context
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
|
||||
var i, offset = 0,
|
||||
text = 'The quick brown fox',
|
||||
textString = new unicodeJS.TextString( text ),
|
||||
breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
|
||||
|
||||
QUnit.expect( 2*(breaks.length - 2) );
|
||||
|
||||
for ( i = 2; i < breaks.length; i++ ) {
|
||||
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
|
||||
assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
|
||||
}
|
||||
for ( i = breaks.length - 3; i >= 0; i-- ) {
|
||||
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
|
||||
assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
|
||||
}
|
||||
});
|
||||
|
||||
QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
|
||||
var i, offset = 0,
|
||||
text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',
|
||||
textString = new unicodeJS.TextString( text ),
|
||||
nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
|
||||
prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
|
||||
|
||||
QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
|
||||
|
||||
for ( i = 0; i < nextBreaks.length; i++ ) {
|
||||
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
|
||||
assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
|
||||
}
|
||||
for ( i = 0; i < prevBreaks.length; i++ ) {
|
||||
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
|
||||
assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
|
||||
}
|
||||
|
||||
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
|
||||
12, 'Jump to end of word when starting in middle of word');
|
||||
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
|
||||
6, 'Jump to end of word when starting at start of word');
|
||||
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
|
||||
19, 'Jump to end of word when starting in double whitespace');
|
||||
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
|
||||
14, 'Jump to start of word when starting in middle of word');
|
||||
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
|
||||
3, 'Jump to start of word when starting at end of word');
|
||||
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
|
||||
7, 'Jump to start of word when starting in double whitespace');
|
||||
});
|
|
@ -1,5 +1,5 @@
|
|||
/*!
|
||||
* Graphemebreak module
|
||||
* UnicodeJS Graphemebreak module
|
||||
*
|
||||
* Implementation of grapheme cluster boundary detection, based on
|
||||
* Unicode UAX #29 Default Grapheme Cluster Boundary Specification; see
|
||||
|
@ -72,13 +72,13 @@
|
|||
];
|
||||
graphemeBreakRegexp = new RegExp( '(' + disjunction.join( '|' ) + ')' );
|
||||
|
||||
/**
|
||||
* Split a string into grapheme clusters.
|
||||
*
|
||||
* @param {string} text Text to split
|
||||
* @returns {string[]} Array of clusters
|
||||
*/
|
||||
graphemebreak.splitClusters = function ( text ) {
|
||||
/**
|
||||
* Split a string into grapheme clusters.
|
||||
*
|
||||
* @param {string} text Text to split
|
||||
* @returns {string[]} Array of clusters
|
||||
*/
|
||||
graphemebreak.splitClusters = function ( text ) {
|
||||
var i, parts, length, clusters = [];
|
||||
parts = text.split( graphemeBreakRegexp );
|
||||
for ( i = 0, length = parts.length; i < length; i++ ) {
|
||||
|
@ -87,5 +87,5 @@
|
|||
}
|
||||
}
|
||||
return clusters;
|
||||
};
|
||||
};
|
||||
}() );
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*!
|
||||
* Wordbreak module
|
||||
* UnicodeJS Word Break module
|
||||
*
|
||||
* Implementation of Unicode's Default Word Boundaries
|
||||
* http://www.unicode.org/reports/tr29/#Default_Word_Boundaries
|
||||
|
|
|
@ -1,255 +0,0 @@
|
|||
/*!
|
||||
* Wordbreak module tests
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
QUnit.module( 'unicodeJS.wordbreak' );
|
||||
|
||||
QUnit.test( 'splitClusters', 1, function ( assert ) {
|
||||
var expected = [
|
||||
'a',
|
||||
' ',
|
||||
' ',
|
||||
'b',
|
||||
'カ',
|
||||
'タ',
|
||||
'カ',
|
||||
'ナ',
|
||||
'c\u0300\u0327', // c with two combining chars
|
||||
'\ud800\udf08', // U+10308 OLD ITALIC LETTER THE
|
||||
'\ud800\udf08\u0302', // U+10308 + combining circumflex
|
||||
'\r\n',
|
||||
'\n',
|
||||
'\u1104\u1173', // jamo L+V
|
||||
'\u1105\u1161\u11a8', // jamo L+V+T
|
||||
'\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters
|
||||
];
|
||||
assert.deepEqual(
|
||||
unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ),
|
||||
expected,
|
||||
'Split clusters'
|
||||
);
|
||||
});
|
||||
|
||||
QUnit.test( 'charRangeArrayRegexp', function ( assert ) {
|
||||
var i, test, doTestFunc, equalityTests, throwTests;
|
||||
|
||||
equalityTests = [
|
||||
[[0x0040], '\\u0040', 'single BMP character'],
|
||||
[[0xFFFF], '\\uffff', 'highest BMP character'],
|
||||
[
|
||||
[0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34],
|
||||
[0xFE4D, 0xFE4F], 0xFF3F],
|
||||
'[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]',
|
||||
'multiple BMP ranges (= ExtendNumLet from wordbreak rules)'
|
||||
],
|
||||
[[0xD7FF], '\\ud7ff', 'just below surrogate range'],
|
||||
[[0xE000], '\\ue000', 'just above surrogate range'],
|
||||
[[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'],
|
||||
[[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP character'],
|
||||
[[0x103FF], '\\ud800\\udfff', 'highest character with D800 leading surrogate'],
|
||||
[[0x10400], '\\ud801\\udc00', 'lowest character with D801 leading surrogate'],
|
||||
[
|
||||
[[0xFF00, 0xFFFF]],
|
||||
'[\\uff00-\\uffff]',
|
||||
'single range at top of BMP'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x10000]],
|
||||
'[\\uff00-\\uffff]|\\ud800\\udc00',
|
||||
'single range spanning BMP and non-BMP'
|
||||
],
|
||||
[
|
||||
[0xFFFF, 0x10000, 0x10002],
|
||||
'\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could compact
|
||||
'single characters, both BMP and non-BMP'
|
||||
],
|
||||
[
|
||||
[[0x0300, 0x0400], 0x10FFFF],
|
||||
'[\\u0300-\\u0400]|\\udbff\\udfff',
|
||||
'BMP range and non-BMP character'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x103FF]],
|
||||
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]',
|
||||
'range to top of D800 leading surrogate range'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x10400]],
|
||||
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00',
|
||||
'range to start of D801 leading surrogate range'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x10401]],
|
||||
'[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]',
|
||||
'range past start of D801 leading surrogate range'
|
||||
],
|
||||
[
|
||||
[[0xFF00, 0x15555]],
|
||||
'[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]',
|
||||
'range spanning multiple leading surrogate ranges'
|
||||
],
|
||||
[
|
||||
[[0x10454, 0x10997]],
|
||||
'\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]',
|
||||
'range starting within one leading surrogate range, and ending in the next'
|
||||
],
|
||||
[
|
||||
[[0x20222, 0x29999]],
|
||||
'\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]',
|
||||
'range starting within one leading surrogate range, and ending in a distant one'
|
||||
],
|
||||
[
|
||||
[0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F,
|
||||
[0x200E, 0x200F], [0x202A, 0x202E], [0x2060, 0x2064],
|
||||
[0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB],
|
||||
0x110BD, [0x1D173, 0x1D17A],
|
||||
0xE0001, [0xE0020, 0xE007F]],
|
||||
// TODO: could compact
|
||||
'[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' +
|
||||
'\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' +
|
||||
'\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' +
|
||||
'|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' +
|
||||
'|\\udb40[\\udc20-\\udc7f]',
|
||||
'multiple BMP and non-BMP ranges (= Format from wordbreak rules)'
|
||||
],
|
||||
[
|
||||
[[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]],
|
||||
'[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]',
|
||||
'largest possible range'
|
||||
]
|
||||
];
|
||||
throwTests = [
|
||||
[[0xD800], 'surrogate character U+D800'],
|
||||
[[0xDFFF], 'surrogate character U+DFFF'],
|
||||
[[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'],
|
||||
[[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'],
|
||||
[[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'],
|
||||
[[[0xCCCC, 0xEEEE]], 'surrogate overlap 4']
|
||||
];
|
||||
|
||||
QUnit.expect( equalityTests.length + throwTests.length );
|
||||
for ( i = 0; i < equalityTests.length; i++ ) {
|
||||
test = equalityTests[i];
|
||||
assert.equal(
|
||||
unicodeJS.charRangeArrayRegexp( test[0] ),
|
||||
test[1],
|
||||
test[2]
|
||||
);
|
||||
}
|
||||
for ( i = 0; i < throwTests.length; i++ ) {
|
||||
/*jshint loopfunc:true */
|
||||
test = throwTests[i];
|
||||
doTestFunc = function () {
|
||||
unicodeJS.charRangeArrayRegexp( test[0] );
|
||||
};
|
||||
assert.throws(
|
||||
doTestFunc,
|
||||
Error,
|
||||
'throw: ' + test[1]
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
QUnit.test( 'isBreak', function ( assert ) {
|
||||
var i, pos, result, context, breakOffsets, textString,
|
||||
broken = [
|
||||
'\u0300', 'xyz\'d', ' ', 'a', '\'', ' ',
|
||||
'\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n',
|
||||
'カタカナ', '3,1.2', ' ',
|
||||
'a_b_3_ナ_', ' ',
|
||||
'汉', '字', '/', '漢', '字', ' ',
|
||||
'c\u0300\u0327k', ' ',
|
||||
// Test ALetter characters above U+FFFF.
|
||||
// ALetter+ should be a single word
|
||||
// (ALetter Extend*)+ should be a single word
|
||||
//
|
||||
// We'll use:
|
||||
// U+10308 OLD ITALIC LETTER THE \ud800\udf08
|
||||
// U+1030A OLD ITALIC LETTER KA \ud800\udf0a
|
||||
// U+0302 COMBINING CIRCUMFLEX \u0302
|
||||
'\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a',
|
||||
' ',
|
||||
'\ud800\udf0a' + '\ud800\udf0a',
|
||||
' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed)
|
||||
//// TODO: test the equivalent hangul decomposed into jamo
|
||||
//// '\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' +
|
||||
//// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' +
|
||||
' ', 'c\u0300\u0327', ' ', 'a', '.'
|
||||
];
|
||||
breakOffsets = [0];
|
||||
pos = 0;
|
||||
for ( i = 0; i < broken.length; i++ ) {
|
||||
pos += unicodeJS.graphemebreak.splitClusters( broken[i] ).length;
|
||||
breakOffsets.push( pos );
|
||||
}
|
||||
textString = new unicodeJS.TextString( broken.join( '' ) ),
|
||||
|
||||
QUnit.expect( textString.getLength() + 1 );
|
||||
|
||||
for ( i = 0; i <= textString.getLength(); i++ ) {
|
||||
result = ( breakOffsets.indexOf( i ) !== -1 );
|
||||
context =
|
||||
textString.substring( Math.max( i - 4, 0 ), i ).getString() +
|
||||
'│' +
|
||||
textString.substring( i, Math.min( i + 4, textString.getLength() ) ).getString()
|
||||
;
|
||||
assert.equal(
|
||||
unicodeJS.wordbreak.isBreak( textString, i ),
|
||||
result,
|
||||
'Break at position ' + i + ' (expect ' + result + '): ' + context
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
|
||||
var i, offset = 0,
|
||||
text = 'The quick brown fox',
|
||||
textString = new unicodeJS.TextString( text ),
|
||||
breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];
|
||||
|
||||
QUnit.expect( 2*(breaks.length - 2) );
|
||||
|
||||
for ( i = 2; i < breaks.length; i++ ) {
|
||||
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
|
||||
assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
|
||||
}
|
||||
for ( i = breaks.length - 3; i >= 0; i-- ) {
|
||||
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
|
||||
assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
|
||||
}
|
||||
});
|
||||
|
||||
QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
|
||||
var i, offset = 0,
|
||||
text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',
|
||||
textString = new unicodeJS.TextString( text ),
|
||||
nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
|
||||
prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];
|
||||
|
||||
QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );
|
||||
|
||||
for ( i = 0; i < nextBreaks.length; i++ ) {
|
||||
offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
|
||||
assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
|
||||
}
|
||||
for ( i = 0; i < prevBreaks.length; i++ ) {
|
||||
offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
|
||||
assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
|
||||
}
|
||||
|
||||
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
|
||||
12, 'Jump to end of word when starting in middle of word');
|
||||
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
|
||||
6, 'Jump to end of word when starting at start of word');
|
||||
assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
|
||||
19, 'Jump to end of word when starting in double whitespace');
|
||||
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
|
||||
14, 'Jump to start of word when starting in middle of word');
|
||||
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
|
||||
3, 'Jump to start of word when starting at end of word');
|
||||
assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
|
||||
7, 'Jump to start of word when starting in double whitespace');
|
||||
});
|
Loading…
Reference in a new issue