mediawiki-extensions-Visual.../modules/unicodejs/unicodejs.js
David Chan 1c78d0a38c Use grapheme clusters in unicodeJS.TextString
unicodejs.js:
* add splitClusters(text) and splitCharacters(text) methods

unicodejs.textstring.js:
* change internal representation from a char string to a list of grapheme
  clusters

unicodejs.wordbreak.js:
* change getGroup to work on the first character of a grapheme cluster

ve.js:
* Use new unicodejs.splitClusters function

Bug: 48975
Change-Id: I202b98199d2780534d1e02519b72579ba796f08f
2013-05-30 17:34:10 +01:00

44 lines
1 KiB
JavaScript

/*!
* UnicodeJS namespace.
*
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
( function () {
var unicodeJS;
/**
* Namespace for all UnicodeJS classes, static methods and static properties.
* @class
* @singleton
*/
unicodeJS = {};
/**
* Split a string into grapheme clusters.
*
* @param {string} text Text to split
* @returns {string[]} Array of clusters
*/
unicodeJS.splitClusters = function ( text ) {
return text.split( /(?![\uDC00-\uDFFF\u0300-\u036F])/g );
// kludge: for now, just don't split UTF surrogate pairs or combining accents
// TODO: implement Grapheme boundary rules
};
/**
* Split a string into Unicode characters, keeping surrogates paired.
*
* @param {string} text Text to split
* @returns {string[]} Array of characters
*/
unicodeJS.splitCharacters = function ( text ) {
return text.split( /(?![\uDC00-\uDFFF])/g );
// TODO: think through handling of invalid UTF-16
};
// Expose
window.unicodeJS = unicodeJS;
}() );