mediawiki-extensions-Visual.../modules/unicodejs/unicodejs.wordbreak.test.js

/*!
 * Wordbreak module tests
 *
 * @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
 * @license The MIT License (MIT); see LICENSE.txt
 */

QUnit.module( 'unicodeJS.wordbreak' );

QUnit.test( 'isBreak', function ( assert ) {
	var i, result, context,
		text =
			/*jshint quotmark:double */
			// 0 - 10
			"\u0300xyz'd a' " +
			// 10 - 20
			"'a a-b 1a\r" +
			// 20 - 30
			"\nカタカナ3,1.2" +
			// 30 - 40
			" a_b_3_ナ_ " +
			// 40 - 50
			"汉字/漢字 c\u0300\u0327k  " +
			// 50 - 60
			"\ud800\udf08" + // U+10308 OLD ITALIC LETTER THE
			"\ud800\udf08\u0302" + // U+10308 OLD ITALIC LETTER THE + combining circumflex
			"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA
			" pad " +
			"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA
			"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA
			// 60 - 65: "a." tests end of para
			" c\u0300\u0327 a.",
			/*jshint quotmark:single */
		textString = new unicodeJS.TextString( text ),
		breaks = [
			0, 1, 6, 7, 8, 9, 10,
			11, 12, 13, 14, 15, 16, 17, 19,
			21, 25, 30,
			31, 39, 40,
			41, 42, 43, 44, 45, 46, 48, 49, 50,
			51, 52, // TODO: these should not be in the list
			53, 54, 57, 58,
			59, // TODO: this should not be in the list
			60,
			61, 62, 63, 64, 65
		];

	QUnit.expect( textString.getLength() + 1 );

	for ( i = 0; i <= textString.getLength(); i++ ) {
		result = ( breaks.indexOf( i ) !== -1 );
		context =
			textString.substring( Math.max( i - 4, 0 ), i ).getString() +
			'│' +
			textString.substring( i, Math.min( i + 4, text.length ) ).getString()
		;
		assert.equal(
			unicodeJS.wordbreak.isBreak( textString, i ),
			result,
			'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
		);
	}
});

QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {
	var i, offset = 0,
		text = 'The quick brown fox',
		textString = new unicodeJS.TextString( text ),
		breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];

	QUnit.expect( 2*(breaks.length - 2) );

	for ( i = 2; i < breaks.length; i++ ) {
		offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );
		assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );
	}
	for ( i = breaks.length - 3; i >= 0; i-- ) {
		offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );
		assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );
	}
});

QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {
	var i, offset = 0,
		text = '   The quick  brown ..fox jumps... 3.14159 すどくスドク   ',
		textString = new unicodeJS.TextString( text ),
		nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],
		prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];

	QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );

	for ( i = 0; i < nextBreaks.length; i++ ) {
		offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );
		assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );
	}
	for ( i = 0; i < prevBreaks.length; i++ ) {
		offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );
		assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );
	}

	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),
		 12, 'Jump to end of word when starting in middle of word');
	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),
		 6, 'Jump to end of word when starting at start of word');
	assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),
		 19, 'Jump to end of word when starting in double whitespace');
	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),
		 14, 'Jump to start of word when starting in middle of word');
	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),
		 3, 'Jump to start of word when starting at end of word');
	assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),
		 7, 'Jump to start of word when starting in double whitespace');
});
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`/*!`
			`* Wordbreak module tests`
			`*`
			`* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt`
			`* @license The MIT License (MIT); see LICENSE.txt`
			`*/`

			`QUnit.module( 'unicodeJS.wordbreak' );`

Implement next/prevBreakOffset and word skipping This provides the functionality for keyboard word skipping (i.e. pressing ctrl/alt + arrow key). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849 2013-04-02 15:50:02 +00:00			`QUnit.test( 'isBreak', function ( assert ) {`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`var i, result, context,`
			`text =`
			`/jshint quotmark:double /`
			`// 0 - 10`
			`"\u0300xyz'd a' " +`
			`// 10 - 20`
			`"'a a-b 1a\r" +`
			`// 20 - 30`
			`"\nカタカナ3,1.2" +`
			`// 30 - 40`
			`" a_b_3_ナ_ " +`
			`// 40 - 50`
Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`"汉字/漢字 c\u0300\u0327k " +`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`// 50 - 60`
Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`"\ud800\udf08" + // U+10308 OLD ITALIC LETTER THE`
			`"\ud800\udf08\u0302" + // U+10308 OLD ITALIC LETTER THE + combining circumflex`
			`"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA`
			`" pad " +`
			`"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA`
			`"\ud800\udf0a" + // U+1030A OLD ITALIC LETTER KA`
			`// 60 - 65: "a." tests end of para`
UnicodeJS treating start/end tags as letters Because /[a-z]/.test(null) treats null as the string 'null' and so returns true. Bug: 48487 Change-Id: I2408229cddd876ff8fe55a9eddf9bac87a61457e 2013-05-18 14:45:10 +00:00			`" c\u0300\u0327 a.",`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`/jshint quotmark:single /`
Implement next/prevBreakOffset and word skipping This provides the functionality for keyboard word skipping (i.e. pressing ctrl/alt + arrow key). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849 2013-04-02 15:50:02 +00:00			`textString = new unicodeJS.TextString( text ),`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`breaks = [`
			`0, 1, 6, 7, 8, 9, 10,`
			`11, 12, 13, 14, 15, 16, 17, 19,`
			`21, 25, 30,`
			`31, 39, 40,`
Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`41, 42, 43, 44, 45, 46, 48, 49, 50,`
			`51, 52, // TODO: these should not be in the list`
			`53, 54, 57, 58,`
			`59, // TODO: this should not be in the list`
			`60,`
			`61, 62, 63, 64, 65`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`];`

Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`QUnit.expect( textString.getLength() + 1 );`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00
Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`for ( i = 0; i <= textString.getLength(); i++ ) {`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`result = ( breaks.indexOf( i ) !== -1 );`
			`context =`
Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`textString.substring( Math.max( i - 4, 0 ), i ).getString() +`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`'│' +`
Use grapheme clusters in unicodeJS.TextString unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f 2013-05-30 15:39:22 +00:00			`textString.substring( i, Math.min( i + 4, text.length ) ).getString()`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`;`
			`assert.equal(`
Implement next/prevBreakOffset and word skipping This provides the functionality for keyboard word skipping (i.e. pressing ctrl/alt + arrow key). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849 2013-04-02 15:50:02 +00:00			`unicodeJS.wordbreak.isBreak( textString, i ),`
UnicodeJS library to implement Unicode standards Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25 2013-03-18 11:31:14 +00:00			`result,`
			`'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context`
			`);`
			`}`
Remove superfluous spaces in function invocations. Find-Exec: ack '\.[a-zA-Z]+\ \(' --js modules/ve modules/unicodejs Change-Id: Ib7d0a6514f3321f1d09fbf7cf52c2a9c2cecde88 2013-04-03 03:00:31 +00:00			`});`
Implement next/prevBreakOffset and word skipping This provides the functionality for keyboard word skipping (i.e. pressing ctrl/alt + arrow key). Bug: 46794 Change-Id: Ib0861fa075df805410717a148b8a6e166d947849 2013-04-02 15:50:02 +00:00
			`QUnit.test( 'nextBreakOffset/prevBreakOffset', function ( assert ) {`
			`var i, offset = 0,`
			`text = 'The quick brown fox',`
			`textString = new unicodeJS.TextString( text ),`
			`breaks = [ 0, 0, 3, 4, 9, 10, 15, 16, 19, 19 ];`

			`QUnit.expect( 2*(breaks.length - 2) );`

			`for ( i = 2; i < breaks.length; i++ ) {`
			`offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset );`
			`assert.equal( offset, breaks[i], 'Next break is at position ' + breaks[i] );`
			`}`
			`for ( i = breaks.length - 3; i >= 0; i-- ) {`
			`offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset );`
			`assert.equal( offset, breaks[i], 'Previous break is at position ' + breaks[i] );`
			`}`
			`});`

			`QUnit.test( 'nextBreakOffset/prevBreakOffset (ignore whitespace)', function ( assert ) {`
			`var i, offset = 0,`
			`text = ' The quick brown ..fox jumps... 3.14159 すどくスドク ',`
			`textString = new unicodeJS.TextString( text ),`
			`nextBreaks = [ 6, 12, 19, 25, 31, 42, 49, 52 ],`
			`prevBreaks = [ 46, 35, 26, 22, 14, 7, 3, 0 ];`

			`QUnit.expect( nextBreaks.length + prevBreaks.length + 6 );`

			`for ( i = 0; i < nextBreaks.length; i++ ) {`
			`offset = unicodeJS.wordbreak.nextBreakOffset( textString, offset, true );`
			`assert.equal( offset, nextBreaks[i], 'Next break is at position ' + nextBreaks[i] );`
			`}`
			`for ( i = 0; i < prevBreaks.length; i++ ) {`
			`offset = unicodeJS.wordbreak.prevBreakOffset( textString, offset, true );`
			`assert.equal( offset, prevBreaks[i], 'Previous break is at position ' + prevBreaks[i] );`
			`}`

			`assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 9, true ),`
			`12, 'Jump to end of word when starting in middle of word');`
			`assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 3, true ),`
			`6, 'Jump to end of word when starting at start of word');`
			`assert.equal( unicodeJS.wordbreak.nextBreakOffset( textString, 13, true ),`
			`19, 'Jump to end of word when starting in double whitespace');`
			`assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 17, true ),`
			`14, 'Jump to start of word when starting in middle of word');`
			`assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 6, true ),`
			`3, 'Jump to start of word when starting at end of word');`
			`assert.equal( unicodeJS.wordbreak.prevBreakOffset( textString, 13, true ),`
			`7, 'Jump to start of word when starting in double whitespace');`
			`});`