From 6d999d8504544ee48ebc50250419df6243bf57e5 Mon Sep 17 00:00:00 2001 From: Ed Sanders Date: Fri, 5 Jul 2013 14:15:08 +0200 Subject: [PATCH] Separate out UnicodeJS tests properly Also fix some comment & whitespace inconsistencies. Change-Id: I71717643678445590820e174e6ed2e5ac58103c2 --- modules/unicodejs/index.php | 4 +- .../test/unicodejs.graphemebreak.test.js | 34 +++ modules/unicodejs/test/unicodejs.test.js | 128 +++++++++ .../test/unicodejs.wordbreak.test.js | 109 ++++++++ modules/unicodejs/unicodejs.graphemebreak.js | 18 +- modules/unicodejs/unicodejs.wordbreak.js | 2 +- modules/unicodejs/unicodejs.wordbreak.test.js | 255 ------------------ 7 files changed, 284 insertions(+), 266 deletions(-) create mode 100644 modules/unicodejs/test/unicodejs.graphemebreak.test.js create mode 100644 modules/unicodejs/test/unicodejs.test.js create mode 100644 modules/unicodejs/test/unicodejs.wordbreak.test.js delete mode 100644 modules/unicodejs/unicodejs.wordbreak.test.js diff --git a/modules/unicodejs/index.php b/modules/unicodejs/index.php index 121764e870..b6d3fd98c6 100644 --- a/modules/unicodejs/index.php +++ b/modules/unicodejs/index.php @@ -26,7 +26,9 @@ - + + +
diff --git a/modules/unicodejs/test/unicodejs.graphemebreak.test.js b/modules/unicodejs/test/unicodejs.graphemebreak.test.js new file mode 100644 index 0000000000..f4cb50c17b --- /dev/null +++ b/modules/unicodejs/test/unicodejs.graphemebreak.test.js @@ -0,0 +1,34 @@ +/*! + * UnicodeJS Grapheme Break module tests + * + * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt + * @license The MIT License (MIT); see LICENSE.txt + */ + +QUnit.module( 'unicodeJS.graphemebreak' ); + +QUnit.test( 'splitClusters', 1, function ( assert ) { + var expected = [ + 'a', + ' ', + ' ', + 'b', + 'カ', + 'タ', + 'カ', + 'ナ', + 'c\u0300\u0327', // c with two combining chars + '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE + '\ud800\udf08\u0302', // U+10308 + combining circumflex + '\r\n', + '\n', + '\u1104\u1173', // jamo L+V + '\u1105\u1161\u11a8', // jamo L+V+T + '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters + ]; + assert.deepEqual( + unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ), + expected, + 'Split clusters' + ); +}); diff --git a/modules/unicodejs/test/unicodejs.test.js b/modules/unicodejs/test/unicodejs.test.js new file mode 100644 index 0000000000..f315185522 --- /dev/null +++ b/modules/unicodejs/test/unicodejs.test.js @@ -0,0 +1,128 @@ +/*! + * UnicodeJS Base module tests + * + * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt + * @license The MIT License (MIT); see LICENSE.txt + */ + +QUnit.module( 'unicodeJS' ); + +QUnit.test( 'charRangeArrayRegexp', function ( assert ) { + var i, test, doTestFunc, equalityTests, throwTests; + + equalityTests = [ + [[0x0040], '\\u0040', 'single BMP character'], + [[0xFFFF], '\\uffff', 'highest BMP character'], + [ + [0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34], + [0xFE4D, 0xFE4F], 0xFF3F], + '[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]', + 'multiple BMP ranges (= ExtendNumLet from wordbreak rules)' + ], + [[0xD7FF], '\\ud7ff', 'just below surrogate range'], + [[0xE000], '\\ue000', 'just above surrogate range'], + [[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'], + [[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP character'], + [[0x103FF], '\\ud800\\udfff', 'highest character with D800 leading surrogate'], + [[0x10400], '\\ud801\\udc00', 'lowest character with D801 leading surrogate'], + [ + [[0xFF00, 0xFFFF]], + '[\\uff00-\\uffff]', + 'single range at top of BMP' + ], + [ + [[0xFF00, 0x10000]], + '[\\uff00-\\uffff]|\\ud800\\udc00', + 'single range spanning BMP and non-BMP' + ], + [ + [0xFFFF, 0x10000, 0x10002], + '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could compact + 'single characters, both BMP and non-BMP' + ], + [ + [[0x0300, 0x0400], 0x10FFFF], + '[\\u0300-\\u0400]|\\udbff\\udfff', + 'BMP range and non-BMP character' + ], + [ + [[0xFF00, 0x103FF]], + '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]', + 'range to top of D800 leading surrogate range' + ], + [ + [[0xFF00, 0x10400]], + '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00', + 'range to start of D801 leading surrogate range' + ], + [ + [[0xFF00, 0x10401]], + '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]', + 'range past start of D801 leading surrogate range' + ], + [ + [[0xFF00, 0x15555]], + '[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]', + 'range spanning multiple leading surrogate ranges' + ], + [ + [[0x10454, 0x10997]], + '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]', + 'range starting within one leading surrogate range, and ending in the next' + ], + [ + [[0x20222, 0x29999]], + '\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]', + 'range starting within one leading surrogate range, and ending in a distant one' + ], + [ + [0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F, + [0x200E, 0x200F], [0x202A, 0x202E], [0x2060, 0x2064], + [0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB], + 0x110BD, [0x1D173, 0x1D17A], + 0xE0001, [0xE0020, 0xE007F]], + // TODO: could compact + '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' + + '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' + + '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' + + '|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' + + '|\\udb40[\\udc20-\\udc7f]', + 'multiple BMP and non-BMP ranges (= Format from wordbreak rules)' + ], + [ + [[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]], + '[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]', + 'largest possible range' + ] + ]; + throwTests = [ + [[0xD800], 'surrogate character U+D800'], + [[0xDFFF], 'surrogate character U+DFFF'], + [[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'], + [[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'], + [[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'], + [[[0xCCCC, 0xEEEE]], 'surrogate overlap 4'] + ]; + + QUnit.expect( equalityTests.length + throwTests.length ); + for ( i = 0; i < equalityTests.length; i++ ) { + test = equalityTests[i]; + assert.equal( + unicodeJS.charRangeArrayRegexp( test[0] ), + test[1], + test[2] + ); + } + for ( i = 0; i < throwTests.length; i++ ) { + /*jshint loopfunc:true */ + test = throwTests[i]; + doTestFunc = function () { + unicodeJS.charRangeArrayRegexp( test[0] ); + }; + assert.throws( + doTestFunc, + Error, + 'throw: ' + test[1] + ); + } +}); diff --git a/modules/unicodejs/test/unicodejs.wordbreak.test.js b/modules/unicodejs/test/unicodejs.wordbreak.test.js new file mode 100644 index 0000000000..bb6696a50b --- /dev/null +++ b/modules/unicodejs/test/unicodejs.wordbreak.test.js @@ -0,0 +1,109 @@ +/*! + * UnicodeJS Word Break module tests + * + * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt + * @license The MIT License (MIT); see LICENSE.txt + */ + +QUnit.module( 'unicodeJS.wordbreak' ); + +QUnit.test( 'isBreak', function ( assert ) { + var i, pos, result, context, breakOffsets, textString, + broken = [ + '\u0300', 'xyz\'d', ' ', 'a', '\'', ' ', + '\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n', + 'カタカナ', '3,1.2', ' ', + 'a_b_3_ナ_', ' ', + '汉', '字', '/', '漢', '字', ' ', + 'c\u0300\u0327k', ' ', + // Test ALetter characters above U+FFFF. + // ALetter+ should be a single word + // (ALetter Extend*)+ should be a single word + // + // We'll use: + // U+10308 OLD ITALIC LETTER THE \ud800\udf08 + // U+1030A OLD ITALIC LETTER KA \ud800\udf0a + // U+0302 COMBINING CIRCUMFLEX \u0302 + '\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a', + ' ', + '\ud800\udf0a' + '\ud800\udf0a', + ' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed) + //// TODO: test the equivalent hangul decomposed into jamo + //// '\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' + + //// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' + + ' ', 'c\u0300\u0327', ' ', 'a', '.' + ]; + breakOffsets = [0]; + pos = 0; + for ( i = 0; i < broken.length; i++ ) { + pos += unicodeJS.graphemebreak.splitClusters( broken[i] ).length; + breakOffsets.push( pos ); + } + textString = new unicodeJS.TextString( broken.join( '' ) ), + + QUnit.expect( textString.getLength() + 1 ); + + for ( i = 0; i <= textString.getLength(); i++ ) { + result = ( breakOffsets.indexOf( i ) !== -1 ); + context = + textString.substring( Math.max( i - 4, 0 ), i ).getString() + + '│' + + textString.substring( i, Math.min( i + 4, textString.getLength() ) ).getString() + ; + assert.equal( + unicodeJS.wordbreak.isBreak( textString, i ), + result, + 'Break at position ' + i + ' (expect ' + result + '): ' + context + ); + } +}); + +QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) { + var i, offset = 0, + text = 'The quick brown fox', + textString = new unicodeJS.TextString( text ), + breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ]; + + QUnit.expect( 2*(breaks.length - 2) ); + + for ( i = 2; i < breaks.length; i++ ) { + offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset ); + assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] ); + } + for ( i = breaks.length - 3; i >= 0; i-- ) { + offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset ); + assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] ); + } +}); + +QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) { + var i, offset = 0, + text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ', + textString = new unicodeJS.TextString( text ), + nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ], + prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ]; + + QUnit.expect( nextBreaks.length + prevBreaks.length + 6 ); + + for ( i = 0; i < nextBreaks.length; i++ ) { + offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true ); + assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] ); + } + for ( i = 0; i < prevBreaks.length; i++ ) { + offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true ); + assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] ); + } + + assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ), + 12, 'Jump to end of word when starting in middle of word'); + assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ), + 6, 'Jump to end of word when starting at start of word'); + assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ), + 19, 'Jump to end of word when starting in double whitespace'); + assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ), + 14, 'Jump to start of word when starting in middle of word'); + assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ), + 3, 'Jump to start of word when starting at end of word'); + assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ), + 7, 'Jump to start of word when starting in double whitespace'); +}); diff --git a/modules/unicodejs/unicodejs.graphemebreak.js b/modules/unicodejs/unicodejs.graphemebreak.js index abb86f58c4..9774730e2a 100644 --- a/modules/unicodejs/unicodejs.graphemebreak.js +++ b/modules/unicodejs/unicodejs.graphemebreak.js @@ -1,5 +1,5 @@ /*! - * Graphemebreak module + * UnicodeJS Graphemebreak module * * Implementation of grapheme cluster boundary detection, based on * Unicode UAX #29 Default Grapheme Cluster Boundary Specification; see @@ -72,13 +72,13 @@ ]; graphemeBreakRegexp = new RegExp( '(' + disjunction.join( '|' ) + ')' ); - /** - * Split a string into grapheme clusters. - * - * @param {string} text Text to split - * @returns {string[]} Array of clusters - */ - graphemebreak.splitClusters = function ( text ) { + /** + * Split a string into grapheme clusters. + * + * @param {string} text Text to split + * @returns {string[]} Array of clusters + */ + graphemebreak.splitClusters = function ( text ) { var i, parts, length, clusters = []; parts = text.split( graphemeBreakRegexp ); for ( i = 0, length = parts.length; i < length; i++ ) { @@ -87,5 +87,5 @@ } } return clusters; - }; + }; }() ); diff --git a/modules/unicodejs/unicodejs.wordbreak.js b/modules/unicodejs/unicodejs.wordbreak.js index 7ab56c3195..dc64cde820 100644 --- a/modules/unicodejs/unicodejs.wordbreak.js +++ b/modules/unicodejs/unicodejs.wordbreak.js @@ -1,5 +1,5 @@ /*! - * Wordbreak module + * UnicodeJS Word Break module * * Implementation of Unicode's Default Word Boundaries * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries diff --git a/modules/unicodejs/unicodejs.wordbreak.test.js b/modules/unicodejs/unicodejs.wordbreak.test.js deleted file mode 100644 index 8501dce81e..0000000000 --- a/modules/unicodejs/unicodejs.wordbreak.test.js +++ /dev/null @@ -1,255 +0,0 @@ -/*! - * Wordbreak module tests - * - * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt - * @license The MIT License (MIT); see LICENSE.txt - */ - -QUnit.module( 'unicodeJS.wordbreak' ); - -QUnit.test( 'splitClusters', 1, function ( assert ) { - var expected = [ - 'a', - ' ', - ' ', - 'b', - 'カ', - 'タ', - 'カ', - 'ナ', - 'c\u0300\u0327', // c with two combining chars - '\ud800\udf08', // U+10308 OLD ITALIC LETTER THE - '\ud800\udf08\u0302', // U+10308 + combining circumflex - '\r\n', - '\n', - '\u1104\u1173', // jamo L+V - '\u1105\u1161\u11a8', // jamo L+V+T - '\ud83c\udded\ud83c\uddf0' // 2*regional indicator characters - ]; - assert.deepEqual( - unicodeJS.graphemebreak.splitClusters( expected.join( '' ) ), - expected, - 'Split clusters' - ); -}); - -QUnit.test( 'charRangeArrayRegexp', function ( assert ) { - var i, test, doTestFunc, equalityTests, throwTests; - - equalityTests = [ - [[0x0040], '\\u0040', 'single BMP character'], - [[0xFFFF], '\\uffff', 'highest BMP character'], - [ - [0x005F, [0x203F, 0x2040], 0x2054, [0xFE33, 0xFE34], - [0xFE4D, 0xFE4F], 0xFF3F], - '[\\u005f\\u203f-\\u2040\\u2054\\ufe33-\\ufe34\\ufe4d-\\ufe4f\\uff3f]', - 'multiple BMP ranges (= ExtendNumLet from wordbreak rules)' - ], - [[0xD7FF], '\\ud7ff', 'just below surrogate range'], - [[0xE000], '\\ue000', 'just above surrogate range'], - [[0x10000], '\\ud800\\udc00', 'lowest non-BMP character'], - [[0x10001], '\\ud800\\udc01', 'second-lowest non-BMP character'], - [[0x103FF], '\\ud800\\udfff', 'highest character with D800 leading surrogate'], - [[0x10400], '\\ud801\\udc00', 'lowest character with D801 leading surrogate'], - [ - [[0xFF00, 0xFFFF]], - '[\\uff00-\\uffff]', - 'single range at top of BMP' - ], - [ - [[0xFF00, 0x10000]], - '[\\uff00-\\uffff]|\\ud800\\udc00', - 'single range spanning BMP and non-BMP' - ], - [ - [0xFFFF, 0x10000, 0x10002], - '\\uffff|\\ud800\\udc00|\\ud800\\udc02', // TODO: could compact - 'single characters, both BMP and non-BMP' - ], - [ - [[0x0300, 0x0400], 0x10FFFF], - '[\\u0300-\\u0400]|\\udbff\\udfff', - 'BMP range and non-BMP character' - ], - [ - [[0xFF00, 0x103FF]], - '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]', - 'range to top of D800 leading surrogate range' - ], - [ - [[0xFF00, 0x10400]], - '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801\\udc00', - 'range to start of D801 leading surrogate range' - ], - [ - [[0xFF00, 0x10401]], - '[\\uff00-\\uffff]|\\ud800[\\udc00-\\udfff]|\\ud801[\\udc00-\\udc01]', - 'range past start of D801 leading surrogate range' - ], - [ - [[0xFF00, 0x15555]], - '[\\uff00-\\uffff]|[\\ud800-\\ud814][\\udc00-\\udfff]|\\ud815[\\udc00-\\udd55]', - 'range spanning multiple leading surrogate ranges' - ], - [ - [[0x10454, 0x10997]], - '\\ud801[\\udc54-\\udfff]|\\ud802[\\udc00-\\udd97]', - 'range starting within one leading surrogate range, and ending in the next' - ], - [ - [[0x20222, 0x29999]], - '\\ud840[\\ude22-\\udfff]|[\\ud841-\\ud865][\\udc00-\\udfff]|\\ud866[\\udc00-\\udd99]', - 'range starting within one leading surrogate range, and ending in a distant one' - ], - [ - [0x00AD, [0x0600, 0x0604], 0x06DD, 0x070F, - [0x200E, 0x200F], [0x202A, 0x202E], [0x2060, 0x2064], - [0x206A, 0x206F], 0xFEFF, [0xFFF9, 0xFFFB], - 0x110BD, [0x1D173, 0x1D17A], - 0xE0001, [0xE0020, 0xE007F]], - // TODO: could compact - '[\\u00ad\\u0600-\\u0604\\u06dd\\u070f' + - '\\u200e-\\u200f\\u202a-\\u202e\\u2060-\\u2064' + - '\\u206a-\\u206f\\ufeff\\ufff9-\\ufffb]' + - '|\\ud804\\udcbd|\\ud834[\\udd73-\\udd7a]|\\udb40\\udc01' + - '|\\udb40[\\udc20-\\udc7f]', - 'multiple BMP and non-BMP ranges (= Format from wordbreak rules)' - ], - [ - [[0x0, 0xD7FF], [0xE000, 0xFFFF], [0x10000, 0x10FFFF]], - '[\\u0000-\\ud7ff\\ue000-\\uffff]|[\\ud800-\\udbff][\\udc00-\\udfff]', - 'largest possible range' - ] - ]; - throwTests = [ - [[0xD800], 'surrogate character U+D800'], - [[0xDFFF], 'surrogate character U+DFFF'], - [[[0xCCCC, 0xDDDD]], 'surrogate overlap 1'], - [[[0xDDDD, 0xEEEE]], 'surrogate overlap 2'], - [[[0xDDDD, 0xEEEEE]], 'surrogate overlap 3'], - [[[0xCCCC, 0xEEEE]], 'surrogate overlap 4'] - ]; - - QUnit.expect( equalityTests.length + throwTests.length ); - for ( i = 0; i < equalityTests.length; i++ ) { - test = equalityTests[i]; - assert.equal( - unicodeJS.charRangeArrayRegexp( test[0] ), - test[1], - test[2] - ); - } - for ( i = 0; i < throwTests.length; i++ ) { - /*jshint loopfunc:true */ - test = throwTests[i]; - doTestFunc = function () { - unicodeJS.charRangeArrayRegexp( test[0] ); - }; - assert.throws( - doTestFunc, - Error, - 'throw: ' + test[1] - ); - } -}); - -QUnit.test( 'isBreak', function ( assert ) { - var i, pos, result, context, breakOffsets, textString, - broken = [ - '\u0300', 'xyz\'d', ' ', 'a', '\'', ' ', - '\'', 'a', ' ', 'a', '-', 'b', ' ', '1a', '\r\n', - 'カタカナ', '3,1.2', ' ', - 'a_b_3_ナ_', ' ', - '汉', '字', '/', '漢', '字', ' ', - 'c\u0300\u0327k', ' ', - // Test ALetter characters above U+FFFF. - // ALetter+ should be a single word - // (ALetter Extend*)+ should be a single word - // - // We'll use: - // U+10308 OLD ITALIC LETTER THE \ud800\udf08 - // U+1030A OLD ITALIC LETTER KA \ud800\udf0a - // U+0302 COMBINING CIRCUMFLEX \u0302 - '\ud800\udf08' + '\ud800\udf08\u0302' + '\ud800\udf0a', - ' ', - '\ud800\udf0a' + '\ud800\udf0a', - ' ', '뜨락또르', ' ', '트랙터', ' ', // hangul (composed) - //// TODO: test the equivalent hangul decomposed into jamo - //// '\u1104\u1173\u1105\u1161\u11a8\u1104\u1169\u1105\u1173 ' + - //// '\u1110\u1173\u1105\u1162\u11a8\u1110\u1165' + - ' ', 'c\u0300\u0327', ' ', 'a', '.' - ]; - breakOffsets = [0]; - pos = 0; - for ( i = 0; i < broken.length; i++ ) { - pos += unicodeJS.graphemebreak.splitClusters( broken[i] ).length; - breakOffsets.push( pos ); - } - textString = new unicodeJS.TextString( broken.join( '' ) ), - - QUnit.expect( textString.getLength() + 1 ); - - for ( i = 0; i <= textString.getLength(); i++ ) { - result = ( breakOffsets.indexOf( i ) !== -1 ); - context = - textString.substring( Math.max( i - 4, 0 ), i ).getString() + - '│' + - textString.substring( i, Math.min( i + 4, textString.getLength() ) ).getString() - ; - assert.equal( - unicodeJS.wordbreak.isBreak( textString, i ), - result, - 'Break at position ' + i + ' (expect ' + result + '): ' + context - ); - } -}); - -QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) { - var i, offset = 0, - text = 'The quick brown fox', - textString = new unicodeJS.TextString( text ), - breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ]; - - QUnit.expect( 2*(breaks.length - 2) ); - - for ( i = 2; i < breaks.length; i++ ) { - offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset ); - assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] ); - } - for ( i = breaks.length - 3; i >= 0; i-- ) { - offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset ); - assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] ); - } -}); - -QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) { - var i, offset = 0, - text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ', - textString = new unicodeJS.TextString( text ), - nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ], - prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ]; - - QUnit.expect( nextBreaks.length + prevBreaks.length + 6 ); - - for ( i = 0; i < nextBreaks.length; i++ ) { - offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true ); - assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] ); - } - for ( i = 0; i < prevBreaks.length; i++ ) { - offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true ); - assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] ); - } - - assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ), - 12, 'Jump to end of word when starting in middle of word'); - assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ), - 6, 'Jump to end of word when starting at start of word'); - assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ), - 19, 'Jump to end of word when starting in double whitespace'); - assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ), - 14, 'Jump to start of word when starting in middle of word'); - assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ), - 3, 'Jump to start of word when starting at end of word'); - assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ), - 7, 'Jump to start of word when starting in double whitespace'); -});