mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-12-02 01:46:47 +00:00
1c78d0a38c
unicodejs.js: * add splitClusters(text) and splitCharacters(text) methods unicodejs.textstring.js: * change internal representation from a char string to a list of grapheme clusters unicodejs.wordbreak.js: * change getGroup to work on the first character of a grapheme cluster ve.js: * Use new unicodejs.splitClusters function Bug: 48975 Change-Id: I202b98199d2780534d1e02519b72579ba796f08f
44 lines
1 KiB
JavaScript
44 lines
1 KiB
JavaScript
/*!
|
|
* UnicodeJS namespace.
|
|
*
|
|
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
|
* @license The MIT License (MIT); see LICENSE.txt
|
|
*/
|
|
|
|
( function () {
|
|
var unicodeJS;
|
|
|
|
/**
|
|
* Namespace for all UnicodeJS classes, static methods and static properties.
|
|
* @class
|
|
* @singleton
|
|
*/
|
|
unicodeJS = {};
|
|
|
|
/**
|
|
* Split a string into grapheme clusters.
|
|
*
|
|
* @param {string} text Text to split
|
|
* @returns {string[]} Array of clusters
|
|
*/
|
|
unicodeJS.splitClusters = function ( text ) {
|
|
return text.split( /(?![\uDC00-\uDFFF\u0300-\u036F])/g );
|
|
// kludge: for now, just don't split UTF surrogate pairs or combining accents
|
|
// TODO: implement Grapheme boundary rules
|
|
};
|
|
|
|
/**
|
|
* Split a string into Unicode characters, keeping surrogates paired.
|
|
*
|
|
* @param {string} text Text to split
|
|
* @returns {string[]} Array of characters
|
|
*/
|
|
unicodeJS.splitCharacters = function ( text ) {
|
|
return text.split( /(?![\uDC00-\uDFFF])/g );
|
|
// TODO: think through handling of invalid UTF-16
|
|
};
|
|
|
|
// Expose
|
|
window.unicodeJS = unicodeJS;
|
|
}() );
|