mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-27 15:50:29 +00:00
UnicodeJS library to implement Unicode standards
Initially just with a Wordbreak module to implement Unicode standard on 'Default Word Boundaries'. Due to it's standaloneability this has been written as a separate library. Non-BMP characters are currently not supported. Bug: 44085 Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25
This commit is contained in:
parent
e1f4196046
commit
4988efd35e
|
@ -38,6 +38,7 @@
|
|||
"ve.dm.MetaItemFactory",
|
||||
"ve.dm.ModelRegistry",
|
||||
"ve.dm.Converter",
|
||||
"ve.dm.DataString",
|
||||
"ve.dm.DocumentSlice",
|
||||
"ve.dm.DocumentSynchronizer",
|
||||
"ve.dm.NodeFactory",
|
||||
|
@ -149,6 +150,17 @@
|
|||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "UnicodeJS",
|
||||
"groups": [
|
||||
{
|
||||
"name": "UnicodeJS",
|
||||
"classes": [
|
||||
"unicodeJS.TextString", "unicodeJS.wordbreak"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Upstream",
|
||||
"groups": [
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
"--output": "../docs",
|
||||
"--": [
|
||||
"./external.js",
|
||||
"../modules/unicodejs",
|
||||
"../modules/ve"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -3,5 +3,6 @@ docs/
|
|||
modules/jquery
|
||||
modules/qunit
|
||||
modules/rangy
|
||||
modules/unicodejs
|
||||
modules/parser
|
||||
tests/parser
|
||||
|
|
|
@ -67,7 +67,15 @@ $wgResourceModules += array(
|
|||
),
|
||||
'jquery.visibleText' => $wgVisualEditorResourceTemplate + array(
|
||||
'scripts' => array(
|
||||
'jquery/jquery.visibleText.js'
|
||||
'jquery/jquery.visibleText.js',
|
||||
),
|
||||
),
|
||||
'unicodejs.wordbreak' => $wgVisualEditorResourceTemplate + array(
|
||||
'scripts' => array(
|
||||
'unicodejs/unicodejs.js',
|
||||
'unicodejs/unicodejs.textstring.js',
|
||||
'unicodejs/unicodejs.wordbreak.groups.js',
|
||||
'unicodejs/unicodejs.wordbreak.js',
|
||||
),
|
||||
),
|
||||
// Alias for backwards compat, safe to remove after
|
||||
|
@ -230,6 +238,7 @@ $wgResourceModules += array(
|
|||
've/dm/ve.dm.Transaction.js',
|
||||
've/dm/ve.dm.Surface.js',
|
||||
've/dm/ve.dm.SurfaceFragment.js',
|
||||
've/dm/ve.dm.DataString.js',
|
||||
've/dm/ve.dm.Document.js',
|
||||
've/dm/ve.dm.DocumentSlice.js',
|
||||
've/dm/ve.dm.DocumentSynchronizer.js',
|
||||
|
@ -390,6 +399,7 @@ $wgResourceModules += array(
|
|||
'dependencies' => array(
|
||||
'jquery',
|
||||
'rangy',
|
||||
'unicodejs.wordbreak',
|
||||
'ext.visualEditor.base',
|
||||
'mediawiki.Title',
|
||||
'jquery.autoEllipsis',
|
||||
|
|
|
@ -68,6 +68,10 @@ $html = file_get_contents( $page );
|
|||
<script src="../../modules/jquery/jquery.js"></script>
|
||||
<script src="../../modules/rangy/rangy-core.js"></script>
|
||||
<script src="../../modules/rangy/rangy-position.js"></script>
|
||||
<script src="../../modules/unicodejs/unicodejs.js"></script>
|
||||
<script src="../../modules/unicodejs/unicodejs.textstring.js"></script>
|
||||
<script src="../../modules/unicodejs/unicodejs.wordbreak.groups.js"></script>
|
||||
<script src="../../modules/unicodejs/unicodejs.wordbreak.js"></script>
|
||||
<!-- ext.visualEditor.base -->
|
||||
<script src="../../modules/ve/ve.js"></script>
|
||||
<script src="../../modules/ve/ve.EventEmitter.js"></script>
|
||||
|
@ -125,6 +129,7 @@ $html = file_get_contents( $page );
|
|||
<script src="../../modules/ve/dm/ve.dm.Transaction.js"></script>
|
||||
<script src="../../modules/ve/dm/ve.dm.Surface.js"></script>
|
||||
<script src="../../modules/ve/dm/ve.dm.SurfaceFragment.js"></script>
|
||||
<script src="../../modules/ve/dm/ve.dm.DataString.js"></script>
|
||||
<script src="../../modules/ve/dm/ve.dm.Document.js"></script>
|
||||
<script src="../../modules/ve/dm/ve.dm.DocumentSlice.js"></script>
|
||||
<script src="../../modules/ve/dm/ve.dm.DocumentSynchronizer.js"></script>
|
||||
|
|
|
@ -42,6 +42,10 @@ class MakeStaticLoader extends Maintenance {
|
|||
'jquery/jquery.js',
|
||||
'rangy/rangy-core.js',
|
||||
'rangy/rangy-position.js',
|
||||
'unicodejs/unicodejs.js',
|
||||
'unicodejs/unicodejs.textstring.js',
|
||||
'unicodejs/unicodejs.wordbreak.groups.js',
|
||||
'unicodejs/unicodejs.wordbreak.js',
|
||||
),
|
||||
);
|
||||
|
||||
|
|
7
modules/unicodejs/AUTHORS.txt
Normal file
7
modules/unicodejs/AUTHORS.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
Principal Authors (major contributors, alphabetically)
|
||||
|
||||
Ed Sanders <esanders@wikimedia.org>
|
||||
|
||||
Patch Contributors (minor contributors, alphabetically)
|
||||
|
||||
David Chan <david@troi.org>
|
25
modules/unicodejs/LICENSE.txt
Normal file
25
modules/unicodejs/LICENSE.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
Copyright (c) 2013 UnicodeJS team and others under the terms
|
||||
of The MIT License (MIT), as follows:
|
||||
|
||||
This software consists of voluntary contributions made by many
|
||||
individuals (AUTHORS.txt) For exact contribution history, see the
|
||||
revision history and logs, available at https://gerrit.wikimedia.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
32
modules/unicodejs/index.php
Normal file
32
modules/unicodejs/index.php
Normal file
|
@ -0,0 +1,32 @@
|
|||
<!--
|
||||
/**
|
||||
* UnicodeJS tests
|
||||
*
|
||||
* @file
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
-->
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>UnicodeJS Tests</title>
|
||||
|
||||
<!-- Load test framework -->
|
||||
<link rel="stylesheet" href="../qunit/qunit.css">
|
||||
<script src="../qunit/qunit.js"></script>
|
||||
|
||||
<!-- Dependencies -->
|
||||
<script src="../jquery/jquery.js"></script>
|
||||
<script src="unicodejs.js"></script>
|
||||
<script src="unicodejs.textstring.js"></script>
|
||||
<script src="unicodejs.wordbreak.groups.js"></script>
|
||||
<script src="unicodejs.wordbreak.js"></script>
|
||||
|
||||
<script src="unicodejs.wordbreak.test.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="qunit"></div>
|
||||
</body>
|
||||
</html>
|
45
modules/unicodejs/tools/unicodejs.wordbreak.groups.php
Normal file
45
modules/unicodejs/tools/unicodejs.wordbreak.groups.php
Normal file
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
/**
|
||||
* Wordbreak character groups generator
|
||||
*
|
||||
* @file
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
echo 'Downloading break point ranges from unicode.org... ';
|
||||
$data = file_get_contents( 'http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakProperty.txt' );
|
||||
echo "done\n";
|
||||
|
||||
echo 'Generating regular expressions... ';
|
||||
$lines = explode( "\n", $data );
|
||||
|
||||
$groups = array();
|
||||
|
||||
for ( $i = 0, $len = count($lines); $i < $len; $i++ ) {
|
||||
$line = $lines[$i];
|
||||
if ( substr( $line, 0, 1 ) === '#' || $line === '' ) {
|
||||
continue;
|
||||
}
|
||||
$cols = preg_split( '/[;#]/', $line );
|
||||
// Ignoring non-BMP characters for the time being
|
||||
if ( preg_match( '/[a-f0-9]{5}/i', $cols[0] ) ) continue;
|
||||
$range = '\u'.str_replace( '..', '-\u', trim( $cols[0] ) );
|
||||
$group = trim( $cols[1] );
|
||||
if ( !isset( $groups[$group] ) ) {
|
||||
$groups[$group] = '';
|
||||
}
|
||||
$groups[$group] .= $range;
|
||||
}
|
||||
|
||||
echo "done\n";
|
||||
|
||||
echo 'Writing to unicodejs.wordbreak.groups.js... ';
|
||||
|
||||
$json = preg_replace( '/ /', "\t", json_encode( $groups, JSON_PRETTY_PRINT ) );
|
||||
file_put_contents(
|
||||
dirname( __DIR__ ) . '/unicodejs.wordbreak.groups.js',
|
||||
"/*jshint quotmark:double */\nunicodeJS.groups = " . $json . ";\n"
|
||||
);
|
||||
|
||||
echo "done\n";
|
8
modules/unicodejs/unicodejs.js
Normal file
8
modules/unicodejs/unicodejs.js
Normal file
|
@ -0,0 +1,8 @@
|
|||
/*!
|
||||
* UnicodeJS namespace.
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
window.unicodeJS = {};
|
38
modules/unicodejs/unicodejs.textstring.js
Normal file
38
modules/unicodejs/unicodejs.textstring.js
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*!
|
||||
* UnicodeJS TextString class.
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
/**
|
||||
* TextString
|
||||
*
|
||||
* This class provides a simple interface to fetching plain text
|
||||
* from a data source. The base class reads data from a string, but
|
||||
* an extended class could provide access to a more complex structure,
|
||||
* e.g. an array or an HTML document tree.
|
||||
*
|
||||
* @class unicodeJS.TextString
|
||||
* @constructor
|
||||
* @param {string} text Text
|
||||
*/
|
||||
unicodeJS.TextString = function UnicodeJSTextString( text ) {
|
||||
this.text = text;
|
||||
};
|
||||
|
||||
/* Methods */
|
||||
|
||||
/**
|
||||
* Read character at specified position
|
||||
*
|
||||
* @method
|
||||
* @param {number} position Position to read from
|
||||
* @returns {string|null} Character, or null if out of bounds
|
||||
*/
|
||||
unicodeJS.TextString.prototype.read = function ( position ) {
|
||||
if ( position < 0 || position >= this.text.length ) {
|
||||
return null;
|
||||
}
|
||||
return this.text.charAt( position );
|
||||
};
|
15
modules/unicodejs/unicodejs.wordbreak.groups.js
Normal file
15
modules/unicodejs/unicodejs.wordbreak.groups.js
Normal file
|
@ -0,0 +1,15 @@
|
|||
/*jshint quotmark:double */
|
||||
unicodeJS.groups = {
|
||||
"CR": "\\u000D",
|
||||
"LF": "\\u000A",
|
||||
"Newline": "\\u000B-\\u000C\\u0085\\u2028\\u2029",
|
||||
"Extend": "\\u0300-\\u036F\\u0483-\\u0487\\u0488-\\u0489\\u0591-\\u05BD\\u05BF\\u05C1-\\u05C2\\u05C4-\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7-\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0859-\\u085B\\u08E4-\\u08FE\\u0900-\\u0902\\u0903\\u093A\\u093B\\u093C\\u093E-\\u0940\\u0941-\\u0948\\u0949-\\u094C\\u094D\\u094E-\\u094F\\u0951-\\u0957\\u0962-\\u0963\\u0981\\u0982-\\u0983\\u09BC\\u09BE-\\u09C0\\u09C1-\\u09C4\\u09C7-\\u09C8\\u09CB-\\u09CC\\u09CD\\u09D7\\u09E2-\\u09E3\\u0A01-\\u0A02\\u0A03\\u0A3C\\u0A3E-\\u0A40\\u0A41-\\u0A42\\u0A47-\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70-\\u0A71\\u0A75\\u0A81-\\u0A82\\u0A83\\u0ABC\\u0ABE-\\u0AC0\\u0AC1-\\u0AC5\\u0AC7-\\u0AC8\\u0AC9\\u0ACB-\\u0ACC\\u0ACD\\u0AE2-\\u0AE3\\u0B01\\u0B02-\\u0B03\\u0B3C\\u0B3E\\u0B3F\\u0B40\\u0B41-\\u0B44\\u0B47-\\u0B48\\u0B4B-\\u0B4C\\u0B4D\\u0B56\\u0B57\\u0B62-\\u0B63\\u0B82\\u0BBE-\\u0BBF\\u0BC0\\u0BC1-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C40\\u0C41-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55-\\u0C56\\u0C62-\\u0C63\\u0C82-\\u0C83\\u0CBC\\u0CBE\\u0CBF\\u0CC0-\\u0CC4\\u0CC6\\u0CC7-\\u0CC8\\u0CCA-\\u0CCB\\u0CCC-\\u0CCD\\u0CD5-\\u0CD6\\u0CE2-\\u0CE3\\u0D02-\\u0D03\\u0D3E-\\u0D40\\u0D41-\\u0D44\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D4D\\u0D57\\u0D62-\\u0D63\\u0D82-\\u0D83\\u0DCA\\u0DCF-\\u0DD1\\u0DD2-\\u0DD4\\u0DD6\\u0DD8-\\u0DDF\\u0DF2-\\u0DF3\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB-\\u0EBC\\u0EC8-\\u0ECD\\u0F18-\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E-\\u0F3F\\u0F71-\\u0F7E\\u0F7F\\u0F80-\\u0F84\\u0F86-\\u0F87\\u0F8D-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102B-\\u102C\\u102D-\\u1030\\u1031\\u1032-\\u1037\\u1038\\u1039-\\u103A\\u103B-\\u103C\\u103D-\\u103E\\u1056-\\u1057\\u1058-\\u1059\\u105E-\\u1060\\u1062-\\u1064\\u1067-\\u106D\\u1071-\\u1074\\u1082\\u1083-\\u1084\\u1085-\\u1086\\u1087-\\u108C\\u108D\\u108F\\u109A-\\u109C\\u109D\\u135D-\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752-\\u1753\\u1772-\\u1773\\u17B4-\\u17B5\\u17B6\\u17B7-\\u17BD\\u17BE-\\u17C5\\u17C6\\u17C7-\\u17C8\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u1922\\u1923-\\u1926\\u1927-\\u1928\\u1929-\\u192B\\u1930-\\u1931\\u1932\\u1933-\\u1938\\u1939-\\u193B\\u19B0-\\u19C0\\u19C8-\\u19C9\\u1A17-\\u1A18\\u1A19-\\u1A1B\\u1A55\\u1A56\\u1A57\\u1A58-\\u1A5E\\u1A60\\u1A61\\u1A62\\u1A63-\\u1A64\\u1A65-\\u1A6C\\u1A6D-\\u1A72\\u1A73-\\u1A7C\\u1A7F\\u1B00-\\u1B03\\u1B04\\u1B34\\u1B35\\u1B36-\\u1B3A\\u1B3B\\u1B3C\\u1B3D-\\u1B41\\u1B42\\u1B43-\\u1B44\\u1B6B-\\u1B73\\u1B80-\\u1B81\\u1B82\\u1BA1\\u1BA2-\\u1BA5\\u1BA6-\\u1BA7\\u1BA8-\\u1BA9\\u1BAA\\u1BAB\\u1BAC-\\u1BAD\\u1BE6\\u1BE7\\u1BE8-\\u1BE9\\u1BEA-\\u1BEC\\u1BED\\u1BEE\\u1BEF-\\u1BF1\\u1BF2-\\u1BF3\\u1C24-\\u1C2B\\u1C2C-\\u1C33\\u1C34-\\u1C35\\u1C36-\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE1\\u1CE2-\\u1CE8\\u1CED\\u1CF2-\\u1CF3\\u1CF4\\u1DC0-\\u1DE6\\u1DFC-\\u1DFF\\u200C-\\u200D\\u20D0-\\u20DC\\u20DD-\\u20E0\\u20E1\\u20E2-\\u20E4\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2D7F\\u2DE0-\\u2DFF\\u302A-\\u302D\\u302E-\\u302F\\u3099-\\u309A\\uA66F\\uA670-\\uA672\\uA674-\\uA67D\\uA69F\\uA6F0-\\uA6F1\\uA802\\uA806\\uA80B\\uA823-\\uA824\\uA825-\\uA826\\uA827\\uA880-\\uA881\\uA8B4-\\uA8C3\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA951\\uA952-\\uA953\\uA980-\\uA982\\uA983\\uA9B3\\uA9B4-\\uA9B5\\uA9B6-\\uA9B9\\uA9BA-\\uA9BB\\uA9BC\\uA9BD-\\uA9C0\\uAA29-\\uAA2E\\uAA2F-\\uAA30\\uAA31-\\uAA32\\uAA33-\\uAA34\\uAA35-\\uAA36\\uAA43\\uAA4C\\uAA4D\\uAA7B\\uAAB0\\uAAB2-\\uAAB4\\uAAB7-\\uAAB8\\uAABE-\\uAABF\\uAAC1\\uAAEB\\uAAEC-\\uAAED\\uAAEE-\\uAAEF\\uAAF5\\uAAF6\\uABE3-\\uABE4\\uABE5\\uABE6-\\uABE7\\uABE8\\uABE9-\\uABEA\\uABEC\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26\\uFF9E-\\uFF9F",
|
||||
"Format": "\\u00AD\\u0600-\\u0604\\u06DD\\u070F\\u200E-\\u200F\\u202A-\\u202E\\u2060-\\u2064\\u206A-\\u206F\\uFEFF\\uFFF9-\\uFFFB",
|
||||
"Katakana": "\\u3031-\\u3035\\u309B-\\u309C\\u30A0\\u30A1-\\u30FA\\u30FC-\\u30FE\\u30FF\\u31F0-\\u31FF\\u32D0-\\u32FE\\u3300-\\u3357\\uFF66-\\uFF6F\\uFF70\\uFF71-\\uFF9D",
|
||||
"ALetter": "\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u01BA\\u01BB\\u01BC-\\u01BF\\u01C0-\\u01C3\\u01C4-\\u0293\\u0294\\u0295-\\u02AF\\u02B0-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0373\\u0374\\u0376-\\u0377\\u037A\\u037B-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0527\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u05F3\\u0620-\\u063F\\u0640\\u0641-\\u064A\\u066E-\\u066F\\u0671-\\u06D3\\u06D5\\u06E5-\\u06E6\\u06EE-\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4-\\u07F5\\u07FA\\u0800-\\u0815\\u081A\\u0824\\u0828\\u0840-\\u0858\\u08A0\\u08A2-\\u08AC\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971\\u0972-\\u0977\\u0979-\\u097F\\u0985-\\u098C\\u098F-\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC-\\u09DD\\u09DF-\\u09E1\\u09F0-\\u09F1\\u0A05-\\u0A0A\\u0A0F-\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32-\\u0A33\\u0A35-\\u0A36\\u0A38-\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2-\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0-\\u0AE1\\u0B05-\\u0B0C\\u0B0F-\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32-\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C-\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99-\\u0B9A\\u0B9C\\u0B9E-\\u0B9F\\u0BA3-\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58-\\u0C59\\u0C60-\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0-\\u0CE1\\u0CF1-\\u0CF2\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D\\u0D4E\\u0D60-\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8C\\u10A0-\\u10C5\\u10C7\\u10CD\\u10D0-\\u10FA\\u10FC\\u10FD-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u167F\\u1681-\\u169A\\u16A0-\\u16EA\\u16EE-\\u16F0\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1820-\\u1842\\u1843\\u1844-\\u1877\\u1880-\\u18A8\\u18AA\\u18B0-\\u18F5\\u1900-\\u191C\\u1A00-\\u1A16\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE-\\u1BAF\\u1BBA-\\u1BE5\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C77\\u1C78-\\u1C7D\\u1CE9-\\u1CEC\\u1CEE-\\u1CF1\\u1CF5-\\u1CF6\\u1D00-\\u1D2B\\u1D2C-\\u1D6A\\u1D6B-\\u1D77\\u1D78\\u1D79-\\u1D9A\\u1D9B-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u209C\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2134\\u2135-\\u2138\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2160-\\u2182\\u2183-\\u2184\\u2185-\\u2188\\u24B6-\\u24E9\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2C7B\\u2C7C-\\u2C7D\\u2C7E-\\u2CE4\\u2CEB-\\u2CEE\\u2CF2-\\u2CF3\\u2D00-\\u2D25\\u2D27\\u2D2D\\u2D30-\\u2D67\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u303B\\u303C\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31BA\\uA000-\\uA014\\uA015\\uA016-\\uA48C\\uA4D0-\\uA4F7\\uA4F8-\\uA4FD\\uA500-\\uA60B\\uA60C\\uA610-\\uA61F\\uA62A-\\uA62B\\uA640-\\uA66D\\uA66E\\uA67F\\uA680-\\uA697\\uA6A0-\\uA6E5\\uA6E6-\\uA6EF\\uA717-\\uA71F\\uA722-\\uA76F\\uA770\\uA771-\\uA787\\uA788\\uA78B-\\uA78E\\uA790-\\uA793\\uA7A0-\\uA7AA\\uA7F8-\\uA7F9\\uA7FA\\uA7FB-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA8F2-\\uA8F7\\uA8FB\\uA90A-\\uA925\\uA930-\\uA946\\uA960-\\uA97C\\uA984-\\uA9B2\\uA9CF\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAAE0-\\uAAEA\\uAAF2\\uAAF3-\\uAAF4\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uABC0-\\uABE2\\uAC00-\\uD7A3\\uD7B0-\\uD7C6\\uD7CB-\\uD7FB\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40-\\uFB41\\uFB43-\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFFA0-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC",
|
||||
"MidLetter": "\\u003A\\u00B7\\u0387\\u05F4\\u2027\\uFE13\\uFE55\\uFF1A",
|
||||
"MidNum": "\\u002C\\u003B\\u037E\\u0589\\u060C-\\u060D\\u066C\\u07F8\\u2044\\uFE10\\uFE14\\uFE50\\uFE54\\uFF0C\\uFF1B",
|
||||
"MidNumLet": "\\u0027\\u002E\\u2018\\u2019\\u2024\\uFE52\\uFF07\\uFF0E",
|
||||
"Numeric": "\\u0030-\\u0039\\u0660-\\u0669\\u066B\\u06F0-\\u06F9\\u07C0-\\u07C9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE6-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29\\u1040-\\u1049\\u1090-\\u1099\\u17E0-\\u17E9\\u1810-\\u1819\\u1946-\\u194F\\u19D0-\\u19D9\\u1A80-\\u1A89\\u1A90-\\u1A99\\u1B50-\\u1B59\\u1BB0-\\u1BB9\\u1C40-\\u1C49\\u1C50-\\u1C59\\uA620-\\uA629\\uA8D0-\\uA8D9\\uA900-\\uA909\\uA9D0-\\uA9D9\\uAA50-\\uAA59\\uABF0-\\uABF9",
|
||||
"ExtendNumLet": "\\u005F\\u203F-\\u2040\\u2054\\uFE33-\\uFE34\\uFE4D-\\uFE4F\\uFF3F"
|
||||
};
|
160
modules/unicodejs/unicodejs.wordbreak.js
Normal file
160
modules/unicodejs/unicodejs.wordbreak.js
Normal file
|
@ -0,0 +1,160 @@
|
|||
/*!
|
||||
* Wordbreak module
|
||||
*
|
||||
* Implementation of Unicode's Default Word Boundaries
|
||||
* http://www.unicode.org/reports/tr29/#Default_Word_Boundaries
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
( function () {
|
||||
var group,
|
||||
groups = unicodeJS.groups,
|
||||
/**
|
||||
* @class unicodeJS.wordbreak
|
||||
* @singleton
|
||||
*/
|
||||
wordbreak = unicodeJS.wordbreak = {},
|
||||
patterns = {};
|
||||
|
||||
// build regexes
|
||||
for ( group in groups ) {
|
||||
patterns[group] = new RegExp( '[' + groups[group] + ']' );
|
||||
}
|
||||
|
||||
function getGroup( chr ) {
|
||||
var group;
|
||||
for ( group in patterns ) {
|
||||
if ( patterns[group].test( chr ) ) {
|
||||
return group;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Evaluates if the specified position within some text is a word boundary.
|
||||
* @param {string} text Text
|
||||
* @param {number} pos Character position
|
||||
* @returns {boolean} Is the position a word boundary
|
||||
*/
|
||||
wordbreak.isBreakInText = function ( text, pos ) {
|
||||
return unicodeJS.wordbreak.isBreakInTextString( new unicodeJS.TextString( text ), pos );
|
||||
};
|
||||
|
||||
/**
|
||||
* Evaluates if the sepcified position within some text is a word boundary.
|
||||
* @param {unicodeJS.TextString} string Text string
|
||||
* @param {number} pos Character position
|
||||
* @returns {boolean} Is the position a word boundary
|
||||
*/
|
||||
wordbreak.isBreakInTextString = function ( string, pos ) {
|
||||
// Break at the start and end of text.
|
||||
// WB1: sot ÷
|
||||
// WB2: ÷ eot
|
||||
if ( string.read( pos - 1 ) === null || string.read( pos ) === null ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// get some context
|
||||
var lft = [], rgt = [], l = 0, r = 0;
|
||||
rgt.push( getGroup( string.read( pos + r ) ) );
|
||||
lft.push( getGroup( string.read( pos - l - 1 ) ) );
|
||||
|
||||
switch ( true ) {
|
||||
// Do not break within CRLF.
|
||||
// WB3: CR × LF
|
||||
case lft[0] === 'CR' && rgt[0] === 'LF':
|
||||
return false;
|
||||
|
||||
// Otherwise break before and after Newlines (including CR and LF)
|
||||
// WB3a: (Newline | CR | LF) ÷
|
||||
case lft[0] === 'Newline' || lft[0] === 'CR' || lft[0] === 'LF':
|
||||
// WB3b: ÷ (Newline | CR | LF)
|
||||
case rgt[0] === 'Newline' || rgt[0] === 'CR' || rgt[0] === 'LF':
|
||||
return true;
|
||||
}
|
||||
|
||||
// Ignore Format and Extend characters, except when they appear at the beginning of a region of text.
|
||||
// WB4: X (Extend | Format)* → X
|
||||
if ( rgt[0] === 'Extend' || rgt[0] === 'Format' ) {
|
||||
// The Extend|Format character is to the right, so it is attached
|
||||
// to a character to the left, don't split here
|
||||
return false;
|
||||
}
|
||||
// We've reached the end of an Extend|Format sequence, collapse it
|
||||
while ( lft[0] === 'Extend' || lft[0] === 'Format' ) {
|
||||
l++;
|
||||
if ( pos - l - 1 <= 0) {
|
||||
// start of document
|
||||
return true;
|
||||
}
|
||||
lft[lft.length - 1] = getGroup( string.read( pos - l - 1 ) );
|
||||
}
|
||||
|
||||
|
||||
// Do not break between most letters.
|
||||
// WB5: ALetter × ALetter
|
||||
if ( lft[0] === 'ALetter' && rgt[0] === 'ALetter' ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// some tests beyond this point require more context
|
||||
l++;
|
||||
r++;
|
||||
rgt.push( getGroup( string.read( pos + r ) ) );
|
||||
lft.push( getGroup( string.read( pos - l - 1 ) ) );
|
||||
|
||||
switch ( true ) {
|
||||
// Do not break letters across certain punctuation.
|
||||
// WB6: ALetter × (MidLetter | MidNumLet) ALetter
|
||||
case lft[0] === 'ALetter' && rgt[1] === 'ALetter' &&
|
||||
( rgt[0] === 'MidLetter' || rgt[0] === 'MidNumLet' ):
|
||||
// WB7: ALetter (MidLetter | MidNumLet) × ALetter
|
||||
case rgt[0] === 'ALetter' && lft[1] === 'ALetter' &&
|
||||
( lft[0] === 'MidLetter' || lft[0] === 'MidNumLet' ):
|
||||
return false;
|
||||
|
||||
// Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”).
|
||||
// WB8: Numeric × Numeric
|
||||
case lft[0] === 'Numeric' && rgt[0] === 'Numeric':
|
||||
// WB9: ALetter × Numeric
|
||||
case lft[0] === 'ALetter' && rgt[0] === 'Numeric':
|
||||
// WB10: Numeric × ALetter
|
||||
case lft[0] === 'Numeric' && rgt[0] === 'ALetter':
|
||||
return false;
|
||||
|
||||
// Do not break within sequences, such as “3.2” or “3,456.789”.
|
||||
// WB11: Numeric (MidNum | MidNumLet) × Numeric
|
||||
case rgt[0] === 'Numeric' && lft[1] === 'Numeric' &&
|
||||
( lft[0] === 'MidNum' || lft[0] === 'MidNumLet' ):
|
||||
// WB12: Numeric × (MidNum | MidNumLet) Numeric
|
||||
case lft[0] === 'Numeric' && rgt[1] === 'Numeric' &&
|
||||
( rgt[0] === 'MidNum' || rgt[0] === 'MidNumLet' ):
|
||||
return false;
|
||||
|
||||
// Do not break between Katakana.
|
||||
// WB13: Katakana × Katakana
|
||||
case lft[0] === 'Katakana' && rgt[0] === 'Katakana':
|
||||
return false;
|
||||
|
||||
// Do not break from extenders.
|
||||
// WB13a: (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||
case rgt[0] === 'ExtendNumLet' &&
|
||||
( lft[0] === 'ALetter' || lft[0] === 'Numeric' || lft[0] === 'Katakana' || lft[0] === 'ExtendNumLet' ):
|
||||
// WB13b: ExtendNumLet × (ALetter | Numeric | Katakana)
|
||||
case lft[0] === 'ExtendNumLet' &&
|
||||
( rgt[0] === 'ALetter' || rgt[0] === 'Numeric' || rgt[0] === 'Katakana' ):
|
||||
return false;
|
||||
|
||||
// Do not break between regional indicator symbols.
|
||||
// WB13c: Regional_Indicator × Regional_Indicator
|
||||
case lft[0] === 'Regional_Indicator' && rgt[0] === 'Regional_Indicator':
|
||||
return false;
|
||||
}
|
||||
// Otherwise, break everywhere (including around ideographs).
|
||||
// WB14: Any ÷ Any
|
||||
return true;
|
||||
};
|
||||
}() );
|
51
modules/unicodejs/unicodejs.wordbreak.test.js
Normal file
51
modules/unicodejs/unicodejs.wordbreak.test.js
Normal file
|
@ -0,0 +1,51 @@
|
|||
/*!
|
||||
* Wordbreak module tests
|
||||
*
|
||||
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
QUnit.module( 'unicodeJS.wordbreak' );
|
||||
|
||||
QUnit.test( 'isBreakInText', function ( assert ) {
|
||||
var i, result, context,
|
||||
text =
|
||||
/*jshint quotmark:double */
|
||||
// 0 - 10
|
||||
"\u0300xyz'd a' " +
|
||||
// 10 - 20
|
||||
"'a a-b 1a\r" +
|
||||
// 20 - 30
|
||||
"\nカタカナ3,1.2" +
|
||||
// 30 - 40
|
||||
" a_b_3_ナ_ " +
|
||||
// 40 - 50
|
||||
"汉字/漢字 c\u0300\u0327k" +
|
||||
// 50 - 60
|
||||
" c\u0300\u0327",
|
||||
/*jshint quotmark:single */
|
||||
breaks = [
|
||||
0, 1, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 19,
|
||||
21, 25, 30,
|
||||
31, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 50,
|
||||
51, 54
|
||||
];
|
||||
|
||||
QUnit.expect( text.length + 1 );
|
||||
|
||||
for ( i = 0; i <= text.length; i++ ) {
|
||||
result = ( breaks.indexOf( i ) !== -1 );
|
||||
context =
|
||||
text.substring( Math.max( i - 4, 0 ), i ) +
|
||||
'│' +
|
||||
text.substring( i, Math.min ( i + 4, text.length ) )
|
||||
;
|
||||
assert.equal(
|
||||
unicodeJS.wordbreak.isBreakInText( text, i ),
|
||||
result,
|
||||
'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
|
||||
);
|
||||
}
|
||||
});
|
36
modules/ve/dm/ve.dm.DataString.js
Normal file
36
modules/ve/dm/ve.dm.DataString.js
Normal file
|
@ -0,0 +1,36 @@
|
|||
/*!
|
||||
* VisualEditor DataString class.
|
||||
*
|
||||
* @copyright 2011-2013 VisualEditor Team and others; see AUTHORS.txt
|
||||
* @license The MIT License (MIT); see LICENSE.txt
|
||||
*/
|
||||
|
||||
/**
|
||||
* Wrapper class to read document data as a plain text string.
|
||||
* @class
|
||||
* @extends unicodeJS.TextString
|
||||
* @constructor
|
||||
* @param {Array} data Document data
|
||||
*/
|
||||
ve.dm.DataString = function VeDmDataString( data ) {
|
||||
this.data = data;
|
||||
};
|
||||
|
||||
/* Inheritance */
|
||||
|
||||
ve.inheritClass( ve.dm.DataString, unicodeJS.TextString );
|
||||
|
||||
/**
|
||||
* Reads the character from the specified position in the data.
|
||||
* @param {number} position Position in data to read from
|
||||
* @returns {string|null} Character at position, or null if not text
|
||||
*/
|
||||
ve.dm.DataString.prototype.read = function( position ) {
|
||||
var dataAt = this.data[position];
|
||||
// check data is present at position and is not an element
|
||||
if ( dataAt !== undefined && dataAt.type === undefined ) {
|
||||
return typeof dataAt === 'string' ? dataAt : dataAt[0];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
};
|
|
@ -1033,43 +1033,57 @@ ve.dm.Document.prototype.getNearestStructuralOffset = function ( offset, directi
|
|||
};
|
||||
|
||||
/**
|
||||
* Get the nearest word boundary.
|
||||
* Get the nearest word boundaries as a range.
|
||||
*
|
||||
* The offset will first be moved to the nearest content offset if it's not at one already. If a
|
||||
* direction was given, the boundary will be found in that direction, otherwise both directions will
|
||||
* be calculated and the one with the lowest distance from offset will be returned. Elements are
|
||||
* always word boundaries. For more information about what is considered to be a word character,
|
||||
* see {ve.dm.SurfaceFragment.wordPattern}.
|
||||
* The offset will first be moved to the nearest content offset if it's not at one already.
|
||||
* Elements are always word boundaries.
|
||||
*
|
||||
* @method
|
||||
* @param {number} offset Offset to start from
|
||||
* @param {number} [direction] Direction to prefer matching offset in, -1 for left and 1 for right
|
||||
* @returns {number} Nearest word boundary
|
||||
* @returns {ve.Range} Range around nearest word boundaries
|
||||
*/
|
||||
ve.dm.Document.prototype.getNearestWordBoundary = function ( offset, direction ) {
|
||||
var left, right, i, inc,
|
||||
pattern = ve.dm.SurfaceFragment.static.wordBoundaryPattern,
|
||||
data = this.data;
|
||||
ve.dm.Document.prototype.getNearestWordRange = function ( offset ) {
|
||||
var offsetLeft, offsetRight, i,
|
||||
dataString = new ve.dm.DataString( this.data );
|
||||
|
||||
offset = this.getNearestContentOffset( offset );
|
||||
if ( !direction ) {
|
||||
left = this.getNearestWordBoundary( offset, -1 );
|
||||
right = this.getNearestWordBoundary( offset, +1 );
|
||||
return offset - left < right - offset ? left : right;
|
||||
} else {
|
||||
inc = direction > 0 ? 1 : -1;
|
||||
i = offset + ( inc > 0 ? 0 : -1 );
|
||||
do {
|
||||
if ( data[i].type === undefined ) {
|
||||
// Plain text extraction
|
||||
if ( pattern.test( typeof data[i] === 'string' ? data[i] : data[i][0] ) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// If the cursor offset is a break (i.e. the start/end of word) we should
|
||||
// check one position either side to see if there is a non-break
|
||||
// and if so, move the offset accordingly
|
||||
if( unicodeJS.wordbreak.isBreakInTextString( dataString, offset ) ) {
|
||||
if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset + 1 ) ) {
|
||||
offset++;
|
||||
} else if( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset - 1 ) ) {
|
||||
offset--;
|
||||
} else {
|
||||
// just return one character to the right, unless we are at the end
|
||||
// of the text, in which case the character to the left
|
||||
if( dataString.read( offset ) !== null ) {
|
||||
return new ve.Range( offset, offset + 1 );
|
||||
} else {
|
||||
break;
|
||||
return new ve.Range( offset - 1, offset );
|
||||
}
|
||||
} while ( data[i += inc] );
|
||||
return i + ( inc > 0 ? 0 : 1 );
|
||||
}
|
||||
}
|
||||
|
||||
i = offset;
|
||||
// Search left and right for next break points
|
||||
while( dataString.read( i++ ) !== null ) {
|
||||
offsetRight = i;
|
||||
if( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
i = offset;
|
||||
while( dataString.read( i-- ) !== null ) {
|
||||
offsetLeft = i;
|
||||
if( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return new ve.Range( offsetLeft, offsetRight );
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -216,7 +216,7 @@ ve.dm.SurfaceFragment.prototype.trimRange = function () {
|
|||
*
|
||||
* @method
|
||||
* @param {string} [scope='parent'] Method of expansion:
|
||||
* - `word`: Expands to cover the nearest word by looking for word boundary characters
|
||||
* - `word`: Expands to cover the nearest word by looking for word breaks (see UnicodeJS.wordbreak)
|
||||
* - `annotation`: Expands to cover a given annotation (argument) within the current range
|
||||
* - `root`: Expands to cover the entire document
|
||||
* - `siblings`: Expands to cover all sibling nodes
|
||||
|
@ -233,10 +233,18 @@ ve.dm.SurfaceFragment.prototype.expandRange = function ( scope, type ) {
|
|||
var range, node, nodes, parent;
|
||||
switch ( scope || 'parent' ) {
|
||||
case 'word':
|
||||
range = new ve.Range(
|
||||
this.document.getNearestWordBoundary( this.range.start, -1 ),
|
||||
this.document.getNearestWordBoundary( this.range.end, 1 )
|
||||
);
|
||||
if( this.range.getLength() > 0 ) {
|
||||
range = ve.Range.newCoveringRange( [
|
||||
this.document.getNearestWordRange( this.range.start ),
|
||||
this.document.getNearestWordRange( this.range.end )
|
||||
] );
|
||||
if( this.range.isBackwards() ) {
|
||||
range = range.flip();
|
||||
}
|
||||
} else {
|
||||
// optimisation for zero-length ranges
|
||||
range = this.document.getNearestWordRange( this.range.start );
|
||||
}
|
||||
break;
|
||||
case 'annotation':
|
||||
range = this.document.getAnnotatedRangeFromSelection( this.range, type );
|
||||
|
|
|
@ -1332,8 +1332,8 @@ QUnit.test( 'getNearestStructuralOffset', function ( assert ) {
|
|||
}
|
||||
} );
|
||||
|
||||
QUnit.test( 'getNearestWordBoundary', function ( assert ) {
|
||||
var i, doc, left, right, word,
|
||||
QUnit.test( 'getNearestWordRange', function ( assert ) {
|
||||
var i, doc, range, word,
|
||||
cases = [
|
||||
{
|
||||
'phrase': 'visual editor test',
|
||||
|
@ -1341,23 +1341,59 @@ QUnit.test( 'getNearestWordBoundary', function ( assert ) {
|
|||
'offset': 10,
|
||||
'expected': 'editor'
|
||||
},
|
||||
{
|
||||
'phrase': 'visual editor test',
|
||||
'msg': 'cursor at start of word',
|
||||
'offset': 7,
|
||||
'expected': 'editor'
|
||||
},
|
||||
{
|
||||
'phrase': 'visual editor test',
|
||||
'msg': 'cursor at end of word',
|
||||
'offset': 13,
|
||||
'expected': 'editor'
|
||||
},
|
||||
{
|
||||
'phrase': 'visual editor test',
|
||||
'msg': 'cursor at start of text',
|
||||
'offset': 0,
|
||||
'expected': 'visual'
|
||||
},
|
||||
{
|
||||
'phrase': 'visual editor test',
|
||||
'msg': 'cursor at end of text',
|
||||
'offset': 18,
|
||||
'expected': 'test'
|
||||
},
|
||||
{
|
||||
'phrase': 'Computer-aided design',
|
||||
'msg': 'hyphenated Latin word',
|
||||
'offset': 2,
|
||||
'expected': 'Computer-aided'
|
||||
'offset': 12,
|
||||
'expected': 'aided'
|
||||
},
|
||||
{
|
||||
'phrase': 'Water (l\'eau) is',
|
||||
'msg': 'apostrophe and parentheses (Latin)',
|
||||
'offset': 8,
|
||||
'expected': '(l\'eau)'
|
||||
'expected': 'l\'eau'
|
||||
},
|
||||
{
|
||||
'phrase': 'Water (H2O) is',
|
||||
'msg': 'number in word (Latin)',
|
||||
'offset': 9,
|
||||
'expected': '(H2O)'
|
||||
'expected': 'H2O'
|
||||
},
|
||||
{
|
||||
'phrase': 'The \'word\' is',
|
||||
'msg': 'apostrophes as single quotes',
|
||||
'offset': 7,
|
||||
'expected': 'word'
|
||||
},
|
||||
{
|
||||
'phrase': 'Some "double" quotes',
|
||||
'msg': 'double quotes',
|
||||
'offset': 8,
|
||||
'expected': 'double'
|
||||
},
|
||||
{
|
||||
'phrase': 'Wikipédia l\'encyclopédie libre',
|
||||
|
@ -1365,6 +1401,12 @@ QUnit.test( 'getNearestWordBoundary', function ( assert ) {
|
|||
'offset': 15,
|
||||
'expected': 'l\'encyclopédie'
|
||||
},
|
||||
{
|
||||
'phrase': 'Wikipédia l\'encyclopédie libre',
|
||||
'msg': 'Extend characters (i.e. letter + accent)',
|
||||
'offset': 15,
|
||||
'expected': 'l\'encyclopédie'
|
||||
},
|
||||
{
|
||||
'phrase': 'Википедия свободная энциклопедия',
|
||||
'msg': 'Cyrillic word',
|
||||
|
@ -1388,15 +1430,49 @@ QUnit.test( 'getNearestWordBoundary', function ( assert ) {
|
|||
'msg': 'Eastern Arabic numerals',
|
||||
'offset': 13,
|
||||
'expected': '٠١٢٣٤٥٦٧٨٩'
|
||||
},
|
||||
{
|
||||
'phrase': 'Latinカタカナwrapped',
|
||||
'msg': 'Latin-wrapped Katakana word',
|
||||
'offset': 7,
|
||||
'expected': 'カタカナ'
|
||||
},
|
||||
{
|
||||
'phrase': '维基百科',
|
||||
'msg': 'Hanzi characters (cursor in middle)',
|
||||
'offset': 2,
|
||||
'expected': '百'
|
||||
},
|
||||
{
|
||||
'phrase': '维基百科',
|
||||
'msg': 'Hanzi characters (cursor at end)',
|
||||
'offset': 4,
|
||||
'expected': '科'
|
||||
},
|
||||
{
|
||||
'phrase': 'Costs £1,234.00 each',
|
||||
'msg': 'formatted number sequence',
|
||||
'offset': 11,
|
||||
'expected': '1,234.00'
|
||||
},
|
||||
{
|
||||
'phrase': 'Reset index_of variable',
|
||||
'msg': 'underscore-joined word',
|
||||
'offset': 8,
|
||||
'expected': 'index_of'
|
||||
}
|
||||
];
|
||||
QUnit.expect( cases.length );
|
||||
for ( i = 0; i < cases.length; i++ ) {
|
||||
doc = new ve.dm.Document( cases[i].phrase.split('') );
|
||||
left = doc.getNearestWordBoundary( cases[i].offset, -1 );
|
||||
right = doc.getNearestWordBoundary( cases[i].offset, 1 );
|
||||
word = cases[i].phrase.substring( left, right );
|
||||
assert.strictEqual( word, cases[i].expected, cases[i].msg );
|
||||
range = doc.getNearestWordRange( cases[i].offset );
|
||||
word = cases[i].phrase.substring( range.start, range.end );
|
||||
assert.strictEqual( word, cases[i].expected,
|
||||
cases[i].msg + ': ' +
|
||||
cases[i].phrase.substring( 0, cases[i].offset ) + '│' +
|
||||
cases[i].phrase.substring( cases[i].offset, cases[i].phrase.length ) +
|
||||
' → ' + cases[i].expected
|
||||
);
|
||||
}
|
||||
} );
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ QUnit.test( 'collapseRange', 3, function ( assert ) {
|
|||
assert.deepEqual( collapsedFragment.getRange(), new ve.Range( 20, 20 ), 'new range is used' );
|
||||
} );
|
||||
|
||||
QUnit.test( 'expandRange', 1, function ( assert ) {
|
||||
QUnit.test( 'expandRange (closest)', 1, function ( assert ) {
|
||||
var doc = new ve.dm.Document( ve.copyArray( ve.dm.example.data ) ),
|
||||
surface = new ve.dm.Surface( doc ),
|
||||
fragment = new ve.dm.SurfaceFragment( surface, new ve.Range( 20, 21 ) );
|
||||
|
@ -70,6 +70,40 @@ QUnit.test( 'expandRange', 1, function ( assert ) {
|
|||
);
|
||||
} );
|
||||
|
||||
QUnit.test( 'expandRange (word)', 1, function ( assert ) {
|
||||
var i, doc, surface, fragment, newFragment, range, word, cases = [
|
||||
{
|
||||
phrase: 'the quick brown fox',
|
||||
range: new ve.Range( 6, 13 ),
|
||||
expected: 'quick brown',
|
||||
msg: 'range starting and ending in latin words'
|
||||
},
|
||||
{
|
||||
phrase: 'the quick brown fox',
|
||||
range: new ve.Range( 18, 12 ),
|
||||
expected: 'brown fox',
|
||||
msg: 'backwards range starting and ending in latin words'
|
||||
},
|
||||
{
|
||||
phrase: 'the quick brown fox',
|
||||
range: new ve.Range( 7, 7 ),
|
||||
expected: 'quick',
|
||||
msg: 'zero-length range'
|
||||
}
|
||||
];
|
||||
QUnit.expect( cases.length*2 );
|
||||
for ( i = 0; i < cases.length; i++ ) {
|
||||
doc = new ve.dm.Document( cases[i].phrase.split('') );
|
||||
surface = new ve.dm.Surface( doc );
|
||||
fragment = new ve.dm.SurfaceFragment( surface, cases[i].range );
|
||||
newFragment = fragment.expandRange( 'word' );
|
||||
range = newFragment.getRange();
|
||||
word = cases[i].phrase.substring( range.start, range.end );
|
||||
assert.strictEqual( word, cases[i].expected, cases[i].msg + ': text' );
|
||||
assert.strictEqual( cases[i].range.isBackwards(), range.isBackwards(), cases[i].msg + ': range direction' );
|
||||
}
|
||||
} );
|
||||
|
||||
QUnit.test( 'removeContent', 2, function ( assert ) {
|
||||
var doc = new ve.dm.Document( ve.copyArray( ve.dm.example.data ) ),
|
||||
surface = new ve.dm.Surface( doc ),
|
||||
|
|
|
@ -13,6 +13,10 @@
|
|||
<script src="../../jquery/jquery.js"></script>
|
||||
<script src="../../rangy/rangy-core.js"></script>
|
||||
<script src="../../rangy/rangy-position.js"></script>
|
||||
<script src="../../unicodejs/unicodejs.js"></script>
|
||||
<script src="../../unicodejs/unicodejs.textstring.js"></script>
|
||||
<script src="../../unicodejs/unicodejs.wordbreak.groups.js"></script>
|
||||
<script src="../../unicodejs/unicodejs.wordbreak.js"></script>
|
||||
<!-- ext.visualEditor.base -->
|
||||
<script src="../../ve/ve.js"></script>
|
||||
<script src="../../ve/ve.EventEmitter.js"></script>
|
||||
|
@ -70,6 +74,7 @@
|
|||
<script src="../../ve/dm/ve.dm.Transaction.js"></script>
|
||||
<script src="../../ve/dm/ve.dm.Surface.js"></script>
|
||||
<script src="../../ve/dm/ve.dm.SurfaceFragment.js"></script>
|
||||
<script src="../../ve/dm/ve.dm.DataString.js"></script>
|
||||
<script src="../../ve/dm/ve.dm.Document.js"></script>
|
||||
<script src="../../ve/dm/ve.dm.DocumentSlice.js"></script>
|
||||
<script src="../../ve/dm/ve.dm.DocumentSynchronizer.js"></script>
|
||||
|
|
Loading…
Reference in a new issue