UnicodeJS library to implement Unicode standards

Initially just with a Wordbreak module to implement Unicode standard
on 'Default Word Boundaries'. Due to it's standaloneability this has
been written as a separate library. Non-BMP characters are currently
not supported.

Bug: 44085
Change-Id: Ieafa070076f4c36855684f6bc179667e28af2c25
This commit is contained in:
Ed Sanders 2013-03-18 11:31:14 +00:00
parent e1f4196046
commit 4988efd35e
22 changed files with 633 additions and 45 deletions

View file

@ -38,6 +38,7 @@
"ve.dm.MetaItemFactory",
"ve.dm.ModelRegistry",
"ve.dm.Converter",
"ve.dm.DataString",
"ve.dm.DocumentSlice",
"ve.dm.DocumentSynchronizer",
"ve.dm.NodeFactory",
@ -149,6 +150,17 @@
}
]
},
{
"name": "UnicodeJS",
"groups": [
{
"name": "UnicodeJS",
"classes": [
"unicodeJS.TextString", "unicodeJS.wordbreak"
]
}
]
},
{
"name": "Upstream",
"groups": [

View file

@ -9,6 +9,7 @@
"--output": "../docs",
"--": [
"./external.js",
"../modules/unicodejs",
"../modules/ve"
]
}

View file

@ -3,5 +3,6 @@ docs/
modules/jquery
modules/qunit
modules/rangy
modules/unicodejs
modules/parser
tests/parser

View file

@ -1,6 +1,7 @@
{
"predef": [
"ve",
"unicodeJS",
"QUnit"
],

View file

@ -67,7 +67,15 @@ $wgResourceModules += array(
),
'jquery.visibleText' => $wgVisualEditorResourceTemplate + array(
'scripts' => array(
'jquery/jquery.visibleText.js'
'jquery/jquery.visibleText.js',
),
),
'unicodejs.wordbreak' => $wgVisualEditorResourceTemplate + array(
'scripts' => array(
'unicodejs/unicodejs.js',
'unicodejs/unicodejs.textstring.js',
'unicodejs/unicodejs.wordbreak.groups.js',
'unicodejs/unicodejs.wordbreak.js',
),
),
// Alias for backwards compat, safe to remove after
@ -230,6 +238,7 @@ $wgResourceModules += array(
've/dm/ve.dm.Transaction.js',
've/dm/ve.dm.Surface.js',
've/dm/ve.dm.SurfaceFragment.js',
've/dm/ve.dm.DataString.js',
've/dm/ve.dm.Document.js',
've/dm/ve.dm.DocumentSlice.js',
've/dm/ve.dm.DocumentSynchronizer.js',
@ -390,6 +399,7 @@ $wgResourceModules += array(
'dependencies' => array(
'jquery',
'rangy',
'unicodejs.wordbreak',
'ext.visualEditor.base',
'mediawiki.Title',
'jquery.autoEllipsis',

View file

@ -68,6 +68,10 @@ $html = file_get_contents( $page );
<script src="../../modules/jquery/jquery.js"></script>
<script src="../../modules/rangy/rangy-core.js"></script>
<script src="../../modules/rangy/rangy-position.js"></script>
<script src="../../modules/unicodejs/unicodejs.js"></script>
<script src="../../modules/unicodejs/unicodejs.textstring.js"></script>
<script src="../../modules/unicodejs/unicodejs.wordbreak.groups.js"></script>
<script src="../../modules/unicodejs/unicodejs.wordbreak.js"></script>
<!-- ext.visualEditor.base -->
<script src="../../modules/ve/ve.js"></script>
<script src="../../modules/ve/ve.EventEmitter.js"></script>
@ -125,6 +129,7 @@ $html = file_get_contents( $page );
<script src="../../modules/ve/dm/ve.dm.Transaction.js"></script>
<script src="../../modules/ve/dm/ve.dm.Surface.js"></script>
<script src="../../modules/ve/dm/ve.dm.SurfaceFragment.js"></script>
<script src="../../modules/ve/dm/ve.dm.DataString.js"></script>
<script src="../../modules/ve/dm/ve.dm.Document.js"></script>
<script src="../../modules/ve/dm/ve.dm.DocumentSlice.js"></script>
<script src="../../modules/ve/dm/ve.dm.DocumentSynchronizer.js"></script>

View file

@ -42,6 +42,10 @@ class MakeStaticLoader extends Maintenance {
'jquery/jquery.js',
'rangy/rangy-core.js',
'rangy/rangy-position.js',
'unicodejs/unicodejs.js',
'unicodejs/unicodejs.textstring.js',
'unicodejs/unicodejs.wordbreak.groups.js',
'unicodejs/unicodejs.wordbreak.js',
),
);

View file

@ -0,0 +1,7 @@
Principal Authors (major contributors, alphabetically)
Ed Sanders <esanders@wikimedia.org>
Patch Contributors (minor contributors, alphabetically)
David Chan <david@troi.org>

View file

@ -0,0 +1,25 @@
Copyright (c) 2013 UnicodeJS team and others under the terms
of The MIT License (MIT), as follows:
This software consists of voluntary contributions made by many
individuals (AUTHORS.txt) For exact contribution history, see the
revision history and logs, available at https://gerrit.wikimedia.org
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,32 @@
<!--
/**
* UnicodeJS tests
*
* @file
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
-->
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>UnicodeJS Tests</title>
<!-- Load test framework -->
<link rel="stylesheet" href="../qunit/qunit.css">
<script src="../qunit/qunit.js"></script>
<!-- Dependencies -->
<script src="../jquery/jquery.js"></script>
<script src="unicodejs.js"></script>
<script src="unicodejs.textstring.js"></script>
<script src="unicodejs.wordbreak.groups.js"></script>
<script src="unicodejs.wordbreak.js"></script>
<script src="unicodejs.wordbreak.test.js"></script>
</head>
<body>
<div id="qunit"></div>
</body>
</html>

View file

@ -0,0 +1,45 @@
<?php
/**
* Wordbreak character groups generator
*
* @file
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
echo 'Downloading break point ranges from unicode.org... ';
$data = file_get_contents( 'http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakProperty.txt' );
echo "done\n";
echo 'Generating regular expressions... ';
$lines = explode( "\n", $data );
$groups = array();
for ( $i = 0, $len = count($lines); $i < $len; $i++ ) {
$line = $lines[$i];
if ( substr( $line, 0, 1 ) === '#' || $line === '' ) {
continue;
}
$cols = preg_split( '/[;#]/', $line );
// Ignoring non-BMP characters for the time being
if ( preg_match( '/[a-f0-9]{5}/i', $cols[0] ) ) continue;
$range = '\u'.str_replace( '..', '-\u', trim( $cols[0] ) );
$group = trim( $cols[1] );
if ( !isset( $groups[$group] ) ) {
$groups[$group] = '';
}
$groups[$group] .= $range;
}
echo "done\n";
echo 'Writing to unicodejs.wordbreak.groups.js... ';
$json = preg_replace( '/ /', "\t", json_encode( $groups, JSON_PRETTY_PRINT ) );
file_put_contents(
dirname( __DIR__ ) . '/unicodejs.wordbreak.groups.js',
"/*jshint quotmark:double */\nunicodeJS.groups = " . $json . ";\n"
);
echo "done\n";

View file

@ -0,0 +1,8 @@
/*!
* UnicodeJS namespace.
*
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
window.unicodeJS = {};

View file

@ -0,0 +1,38 @@
/*!
* UnicodeJS TextString class.
*
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
/**
* TextString
*
* This class provides a simple interface to fetching plain text
* from a data source. The base class reads data from a string, but
* an extended class could provide access to a more complex structure,
* e.g. an array or an HTML document tree.
*
* @class unicodeJS.TextString
* @constructor
* @param {string} text Text
*/
unicodeJS.TextString = function UnicodeJSTextString( text ) {
this.text = text;
};
/* Methods */
/**
* Read character at specified position
*
* @method
* @param {number} position Position to read from
* @returns {string|null} Character, or null if out of bounds
*/
unicodeJS.TextString.prototype.read = function ( position ) {
if ( position < 0 || position >= this.text.length ) {
return null;
}
return this.text.charAt( position );
};

View file

@ -0,0 +1,15 @@
/*jshint quotmark:double */
unicodeJS.groups = {
"CR": "\\u000D",
"LF": "\\u000A",
"Newline": "\\u000B-\\u000C\\u0085\\u2028\\u2029",
"Extend": "\\u0300-\\u036F\\u0483-\\u0487\\u0488-\\u0489\\u0591-\\u05BD\\u05BF\\u05C1-\\u05C2\\u05C4-\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7-\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0859-\\u085B\\u08E4-\\u08FE\\u0900-\\u0902\\u0903\\u093A\\u093B\\u093C\\u093E-\\u0940\\u0941-\\u0948\\u0949-\\u094C\\u094D\\u094E-\\u094F\\u0951-\\u0957\\u0962-\\u0963\\u0981\\u0982-\\u0983\\u09BC\\u09BE-\\u09C0\\u09C1-\\u09C4\\u09C7-\\u09C8\\u09CB-\\u09CC\\u09CD\\u09D7\\u09E2-\\u09E3\\u0A01-\\u0A02\\u0A03\\u0A3C\\u0A3E-\\u0A40\\u0A41-\\u0A42\\u0A47-\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70-\\u0A71\\u0A75\\u0A81-\\u0A82\\u0A83\\u0ABC\\u0ABE-\\u0AC0\\u0AC1-\\u0AC5\\u0AC7-\\u0AC8\\u0AC9\\u0ACB-\\u0ACC\\u0ACD\\u0AE2-\\u0AE3\\u0B01\\u0B02-\\u0B03\\u0B3C\\u0B3E\\u0B3F\\u0B40\\u0B41-\\u0B44\\u0B47-\\u0B48\\u0B4B-\\u0B4C\\u0B4D\\u0B56\\u0B57\\u0B62-\\u0B63\\u0B82\\u0BBE-\\u0BBF\\u0BC0\\u0BC1-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCC\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C40\\u0C41-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55-\\u0C56\\u0C62-\\u0C63\\u0C82-\\u0C83\\u0CBC\\u0CBE\\u0CBF\\u0CC0-\\u0CC4\\u0CC6\\u0CC7-\\u0CC8\\u0CCA-\\u0CCB\\u0CCC-\\u0CCD\\u0CD5-\\u0CD6\\u0CE2-\\u0CE3\\u0D02-\\u0D03\\u0D3E-\\u0D40\\u0D41-\\u0D44\\u0D46-\\u0D48\\u0D4A-\\u0D4C\\u0D4D\\u0D57\\u0D62-\\u0D63\\u0D82-\\u0D83\\u0DCA\\u0DCF-\\u0DD1\\u0DD2-\\u0DD4\\u0DD6\\u0DD8-\\u0DDF\\u0DF2-\\u0DF3\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB-\\u0EBC\\u0EC8-\\u0ECD\\u0F18-\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E-\\u0F3F\\u0F71-\\u0F7E\\u0F7F\\u0F80-\\u0F84\\u0F86-\\u0F87\\u0F8D-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102B-\\u102C\\u102D-\\u1030\\u1031\\u1032-\\u1037\\u1038\\u1039-\\u103A\\u103B-\\u103C\\u103D-\\u103E\\u1056-\\u1057\\u1058-\\u1059\\u105E-\\u1060\\u1062-\\u1064\\u1067-\\u106D\\u1071-\\u1074\\u1082\\u1083-\\u1084\\u1085-\\u1086\\u1087-\\u108C\\u108D\\u108F\\u109A-\\u109C\\u109D\\u135D-\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752-\\u1753\\u1772-\\u1773\\u17B4-\\u17B5\\u17B6\\u17B7-\\u17BD\\u17BE-\\u17C5\\u17C6\\u17C7-\\u17C8\\u17C9-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u1922\\u1923-\\u1926\\u1927-\\u1928\\u1929-\\u192B\\u1930-\\u1931\\u1932\\u1933-\\u1938\\u1939-\\u193B\\u19B0-\\u19C0\\u19C8-\\u19C9\\u1A17-\\u1A18\\u1A19-\\u1A1B\\u1A55\\u1A56\\u1A57\\u1A58-\\u1A5E\\u1A60\\u1A61\\u1A62\\u1A63-\\u1A64\\u1A65-\\u1A6C\\u1A6D-\\u1A72\\u1A73-\\u1A7C\\u1A7F\\u1B00-\\u1B03\\u1B04\\u1B34\\u1B35\\u1B36-\\u1B3A\\u1B3B\\u1B3C\\u1B3D-\\u1B41\\u1B42\\u1B43-\\u1B44\\u1B6B-\\u1B73\\u1B80-\\u1B81\\u1B82\\u1BA1\\u1BA2-\\u1BA5\\u1BA6-\\u1BA7\\u1BA8-\\u1BA9\\u1BAA\\u1BAB\\u1BAC-\\u1BAD\\u1BE6\\u1BE7\\u1BE8-\\u1BE9\\u1BEA-\\u1BEC\\u1BED\\u1BEE\\u1BEF-\\u1BF1\\u1BF2-\\u1BF3\\u1C24-\\u1C2B\\u1C2C-\\u1C33\\u1C34-\\u1C35\\u1C36-\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE1\\u1CE2-\\u1CE8\\u1CED\\u1CF2-\\u1CF3\\u1CF4\\u1DC0-\\u1DE6\\u1DFC-\\u1DFF\\u200C-\\u200D\\u20D0-\\u20DC\\u20DD-\\u20E0\\u20E1\\u20E2-\\u20E4\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2D7F\\u2DE0-\\u2DFF\\u302A-\\u302D\\u302E-\\u302F\\u3099-\\u309A\\uA66F\\uA670-\\uA672\\uA674-\\uA67D\\uA69F\\uA6F0-\\uA6F1\\uA802\\uA806\\uA80B\\uA823-\\uA824\\uA825-\\uA826\\uA827\\uA880-\\uA881\\uA8B4-\\uA8C3\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA951\\uA952-\\uA953\\uA980-\\uA982\\uA983\\uA9B3\\uA9B4-\\uA9B5\\uA9B6-\\uA9B9\\uA9BA-\\uA9BB\\uA9BC\\uA9BD-\\uA9C0\\uAA29-\\uAA2E\\uAA2F-\\uAA30\\uAA31-\\uAA32\\uAA33-\\uAA34\\uAA35-\\uAA36\\uAA43\\uAA4C\\uAA4D\\uAA7B\\uAAB0\\uAAB2-\\uAAB4\\uAAB7-\\uAAB8\\uAABE-\\uAABF\\uAAC1\\uAAEB\\uAAEC-\\uAAED\\uAAEE-\\uAAEF\\uAAF5\\uAAF6\\uABE3-\\uABE4\\uABE5\\uABE6-\\uABE7\\uABE8\\uABE9-\\uABEA\\uABEC\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26\\uFF9E-\\uFF9F",
"Format": "\\u00AD\\u0600-\\u0604\\u06DD\\u070F\\u200E-\\u200F\\u202A-\\u202E\\u2060-\\u2064\\u206A-\\u206F\\uFEFF\\uFFF9-\\uFFFB",
"Katakana": "\\u3031-\\u3035\\u309B-\\u309C\\u30A0\\u30A1-\\u30FA\\u30FC-\\u30FE\\u30FF\\u31F0-\\u31FF\\u32D0-\\u32FE\\u3300-\\u3357\\uFF66-\\uFF6F\\uFF70\\uFF71-\\uFF9D",
"ALetter": "\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u01BA\\u01BB\\u01BC-\\u01BF\\u01C0-\\u01C3\\u01C4-\\u0293\\u0294\\u0295-\\u02AF\\u02B0-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0373\\u0374\\u0376-\\u0377\\u037A\\u037B-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0527\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u05F3\\u0620-\\u063F\\u0640\\u0641-\\u064A\\u066E-\\u066F\\u0671-\\u06D3\\u06D5\\u06E5-\\u06E6\\u06EE-\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4-\\u07F5\\u07FA\\u0800-\\u0815\\u081A\\u0824\\u0828\\u0840-\\u0858\\u08A0\\u08A2-\\u08AC\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971\\u0972-\\u0977\\u0979-\\u097F\\u0985-\\u098C\\u098F-\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC-\\u09DD\\u09DF-\\u09E1\\u09F0-\\u09F1\\u0A05-\\u0A0A\\u0A0F-\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32-\\u0A33\\u0A35-\\u0A36\\u0A38-\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2-\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0-\\u0AE1\\u0B05-\\u0B0C\\u0B0F-\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32-\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C-\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99-\\u0B9A\\u0B9C\\u0B9E-\\u0B9F\\u0BA3-\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58-\\u0C59\\u0C60-\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0-\\u0CE1\\u0CF1-\\u0CF2\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D\\u0D4E\\u0D60-\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8C\\u10A0-\\u10C5\\u10C7\\u10CD\\u10D0-\\u10FA\\u10FC\\u10FD-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u167F\\u1681-\\u169A\\u16A0-\\u16EA\\u16EE-\\u16F0\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1820-\\u1842\\u1843\\u1844-\\u1877\\u1880-\\u18A8\\u18AA\\u18B0-\\u18F5\\u1900-\\u191C\\u1A00-\\u1A16\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE-\\u1BAF\\u1BBA-\\u1BE5\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C77\\u1C78-\\u1C7D\\u1CE9-\\u1CEC\\u1CEE-\\u1CF1\\u1CF5-\\u1CF6\\u1D00-\\u1D2B\\u1D2C-\\u1D6A\\u1D6B-\\u1D77\\u1D78\\u1D79-\\u1D9A\\u1D9B-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u209C\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2134\\u2135-\\u2138\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2160-\\u2182\\u2183-\\u2184\\u2185-\\u2188\\u24B6-\\u24E9\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2C7B\\u2C7C-\\u2C7D\\u2C7E-\\u2CE4\\u2CEB-\\u2CEE\\u2CF2-\\u2CF3\\u2D00-\\u2D25\\u2D27\\u2D2D\\u2D30-\\u2D67\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u303B\\u303C\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31BA\\uA000-\\uA014\\uA015\\uA016-\\uA48C\\uA4D0-\\uA4F7\\uA4F8-\\uA4FD\\uA500-\\uA60B\\uA60C\\uA610-\\uA61F\\uA62A-\\uA62B\\uA640-\\uA66D\\uA66E\\uA67F\\uA680-\\uA697\\uA6A0-\\uA6E5\\uA6E6-\\uA6EF\\uA717-\\uA71F\\uA722-\\uA76F\\uA770\\uA771-\\uA787\\uA788\\uA78B-\\uA78E\\uA790-\\uA793\\uA7A0-\\uA7AA\\uA7F8-\\uA7F9\\uA7FA\\uA7FB-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA8F2-\\uA8F7\\uA8FB\\uA90A-\\uA925\\uA930-\\uA946\\uA960-\\uA97C\\uA984-\\uA9B2\\uA9CF\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAAE0-\\uAAEA\\uAAF2\\uAAF3-\\uAAF4\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uABC0-\\uABE2\\uAC00-\\uD7A3\\uD7B0-\\uD7C6\\uD7CB-\\uD7FB\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40-\\uFB41\\uFB43-\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFFA0-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC",
"MidLetter": "\\u003A\\u00B7\\u0387\\u05F4\\u2027\\uFE13\\uFE55\\uFF1A",
"MidNum": "\\u002C\\u003B\\u037E\\u0589\\u060C-\\u060D\\u066C\\u07F8\\u2044\\uFE10\\uFE14\\uFE50\\uFE54\\uFF0C\\uFF1B",
"MidNumLet": "\\u0027\\u002E\\u2018\\u2019\\u2024\\uFE52\\uFF07\\uFF0E",
"Numeric": "\\u0030-\\u0039\\u0660-\\u0669\\u066B\\u06F0-\\u06F9\\u07C0-\\u07C9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE6-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29\\u1040-\\u1049\\u1090-\\u1099\\u17E0-\\u17E9\\u1810-\\u1819\\u1946-\\u194F\\u19D0-\\u19D9\\u1A80-\\u1A89\\u1A90-\\u1A99\\u1B50-\\u1B59\\u1BB0-\\u1BB9\\u1C40-\\u1C49\\u1C50-\\u1C59\\uA620-\\uA629\\uA8D0-\\uA8D9\\uA900-\\uA909\\uA9D0-\\uA9D9\\uAA50-\\uAA59\\uABF0-\\uABF9",
"ExtendNumLet": "\\u005F\\u203F-\\u2040\\u2054\\uFE33-\\uFE34\\uFE4D-\\uFE4F\\uFF3F"
};

View file

@ -0,0 +1,160 @@
/*!
* Wordbreak module
*
* Implementation of Unicode's Default Word Boundaries
* http://www.unicode.org/reports/tr29/#Default_Word_Boundaries
*
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
( function () {
var group,
groups = unicodeJS.groups,
/**
* @class unicodeJS.wordbreak
* @singleton
*/
wordbreak = unicodeJS.wordbreak = {},
patterns = {};
// build regexes
for ( group in groups ) {
patterns[group] = new RegExp( '[' + groups[group] + ']' );
}
function getGroup( chr ) {
var group;
for ( group in patterns ) {
if ( patterns[group].test( chr ) ) {
return group;
}
}
return null;
}
/**
* Evaluates if the specified position within some text is a word boundary.
* @param {string} text Text
* @param {number} pos Character position
* @returns {boolean} Is the position a word boundary
*/
wordbreak.isBreakInText = function ( text, pos ) {
return unicodeJS.wordbreak.isBreakInTextString( new unicodeJS.TextString( text ), pos );
};
/**
* Evaluates if the sepcified position within some text is a word boundary.
* @param {unicodeJS.TextString} string Text string
* @param {number} pos Character position
* @returns {boolean} Is the position a word boundary
*/
wordbreak.isBreakInTextString = function ( string, pos ) {
// Break at the start and end of text.
// WB1: sot ÷
// WB2: ÷ eot
if ( string.read( pos - 1 ) === null || string.read( pos ) === null ) {
return true;
}
// get some context
var lft = [], rgt = [], l = 0, r = 0;
rgt.push( getGroup( string.read( pos + r ) ) );
lft.push( getGroup( string.read( pos - l - 1 ) ) );
switch ( true ) {
// Do not break within CRLF.
// WB3: CR × LF
case lft[0] === 'CR' && rgt[0] === 'LF':
return false;
// Otherwise break before and after Newlines (including CR and LF)
// WB3a: (Newline | CR | LF) ÷
case lft[0] === 'Newline' || lft[0] === 'CR' || lft[0] === 'LF':
// WB3b: ÷ (Newline | CR | LF)
case rgt[0] === 'Newline' || rgt[0] === 'CR' || rgt[0] === 'LF':
return true;
}
// Ignore Format and Extend characters, except when they appear at the beginning of a region of text.
// WB4: X (Extend | Format)* → X
if ( rgt[0] === 'Extend' || rgt[0] === 'Format' ) {
// The Extend|Format character is to the right, so it is attached
// to a character to the left, don't split here
return false;
}
// We've reached the end of an Extend|Format sequence, collapse it
while ( lft[0] === 'Extend' || lft[0] === 'Format' ) {
l++;
if ( pos - l - 1 <= 0) {
// start of document
return true;
}
lft[lft.length - 1] = getGroup( string.read( pos - l - 1 ) );
}
// Do not break between most letters.
// WB5: ALetter × ALetter
if ( lft[0] === 'ALetter' && rgt[0] === 'ALetter' ) {
return false;
}
// some tests beyond this point require more context
l++;
r++;
rgt.push( getGroup( string.read( pos + r ) ) );
lft.push( getGroup( string.read( pos - l - 1 ) ) );
switch ( true ) {
// Do not break letters across certain punctuation.
// WB6: ALetter × (MidLetter | MidNumLet) ALetter
case lft[0] === 'ALetter' && rgt[1] === 'ALetter' &&
( rgt[0] === 'MidLetter' || rgt[0] === 'MidNumLet' ):
// WB7: ALetter (MidLetter | MidNumLet) × ALetter
case rgt[0] === 'ALetter' && lft[1] === 'ALetter' &&
( lft[0] === 'MidLetter' || lft[0] === 'MidNumLet' ):
return false;
// Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”).
// WB8: Numeric × Numeric
case lft[0] === 'Numeric' && rgt[0] === 'Numeric':
// WB9: ALetter × Numeric
case lft[0] === 'ALetter' && rgt[0] === 'Numeric':
// WB10: Numeric × ALetter
case lft[0] === 'Numeric' && rgt[0] === 'ALetter':
return false;
// Do not break within sequences, such as “3.2” or “3,456.789”.
// WB11: Numeric (MidNum | MidNumLet) × Numeric
case rgt[0] === 'Numeric' && lft[1] === 'Numeric' &&
( lft[0] === 'MidNum' || lft[0] === 'MidNumLet' ):
// WB12: Numeric × (MidNum | MidNumLet) Numeric
case lft[0] === 'Numeric' && rgt[1] === 'Numeric' &&
( rgt[0] === 'MidNum' || rgt[0] === 'MidNumLet' ):
return false;
// Do not break between Katakana.
// WB13: Katakana × Katakana
case lft[0] === 'Katakana' && rgt[0] === 'Katakana':
return false;
// Do not break from extenders.
// WB13a: (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
case rgt[0] === 'ExtendNumLet' &&
( lft[0] === 'ALetter' || lft[0] === 'Numeric' || lft[0] === 'Katakana' || lft[0] === 'ExtendNumLet' ):
// WB13b: ExtendNumLet × (ALetter | Numeric | Katakana)
case lft[0] === 'ExtendNumLet' &&
( rgt[0] === 'ALetter' || rgt[0] === 'Numeric' || rgt[0] === 'Katakana' ):
return false;
// Do not break between regional indicator symbols.
// WB13c: Regional_Indicator × Regional_Indicator
case lft[0] === 'Regional_Indicator' && rgt[0] === 'Regional_Indicator':
return false;
}
// Otherwise, break everywhere (including around ideographs).
// WB14: Any ÷ Any
return true;
};
}() );

View file

@ -0,0 +1,51 @@
/*!
* Wordbreak module tests
*
* @copyright 2013 UnicodeJS team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
QUnit.module( 'unicodeJS.wordbreak' );
QUnit.test( 'isBreakInText', function ( assert ) {
var i, result, context,
text =
/*jshint quotmark:double */
// 0 - 10
"\u0300xyz'd a' " +
// 10 - 20
"'a a-b 1a\r" +
// 20 - 30
"\nカタカナ3,1.2" +
// 30 - 40
" a_b_3_ナ_ " +
// 40 - 50
"汉字/漢字 c\u0300\u0327k" +
// 50 - 60
" c\u0300\u0327",
/*jshint quotmark:single */
breaks = [
0, 1, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 19,
21, 25, 30,
31, 39, 40,
41, 42, 43, 44, 45, 46, 50,
51, 54
];
QUnit.expect( text.length + 1 );
for ( i = 0; i <= text.length; i++ ) {
result = ( breaks.indexOf( i ) !== -1 );
context =
text.substring( Math.max( i - 4, 0 ), i ) +
'│' +
text.substring( i, Math.min ( i + 4, text.length ) )
;
assert.equal(
unicodeJS.wordbreak.isBreakInText( text, i ),
result,
'Position ' + i + ' is ' + ( result ? '' : 'not ' ) + 'a break: ' + context
);
}
});

View file

@ -0,0 +1,36 @@
/*!
* VisualEditor DataString class.
*
* @copyright 2011-2013 VisualEditor Team and others; see AUTHORS.txt
* @license The MIT License (MIT); see LICENSE.txt
*/
/**
* Wrapper class to read document data as a plain text string.
* @class
* @extends unicodeJS.TextString
* @constructor
* @param {Array} data Document data
*/
ve.dm.DataString = function VeDmDataString( data ) {
this.data = data;
};
/* Inheritance */
ve.inheritClass( ve.dm.DataString, unicodeJS.TextString );
/**
* Reads the character from the specified position in the data.
* @param {number} position Position in data to read from
* @returns {string|null} Character at position, or null if not text
*/
ve.dm.DataString.prototype.read = function( position ) {
var dataAt = this.data[position];
// check data is present at position and is not an element
if ( dataAt !== undefined && dataAt.type === undefined ) {
return typeof dataAt === 'string' ? dataAt : dataAt[0];
} else {
return null;
}
};

View file

@ -1033,43 +1033,57 @@ ve.dm.Document.prototype.getNearestStructuralOffset = function ( offset, directi
};
/**
* Get the nearest word boundary.
* Get the nearest word boundaries as a range.
*
* The offset will first be moved to the nearest content offset if it's not at one already. If a
* direction was given, the boundary will be found in that direction, otherwise both directions will
* be calculated and the one with the lowest distance from offset will be returned. Elements are
* always word boundaries. For more information about what is considered to be a word character,
* see {ve.dm.SurfaceFragment.wordPattern}.
* The offset will first be moved to the nearest content offset if it's not at one already.
* Elements are always word boundaries.
*
* @method
* @param {number} offset Offset to start from
* @param {number} [direction] Direction to prefer matching offset in, -1 for left and 1 for right
* @returns {number} Nearest word boundary
* @returns {ve.Range} Range around nearest word boundaries
*/
ve.dm.Document.prototype.getNearestWordBoundary = function ( offset, direction ) {
var left, right, i, inc,
pattern = ve.dm.SurfaceFragment.static.wordBoundaryPattern,
data = this.data;
ve.dm.Document.prototype.getNearestWordRange = function ( offset ) {
var offsetLeft, offsetRight, i,
dataString = new ve.dm.DataString( this.data );
offset = this.getNearestContentOffset( offset );
if ( !direction ) {
left = this.getNearestWordBoundary( offset, -1 );
right = this.getNearestWordBoundary( offset, +1 );
return offset - left < right - offset ? left : right;
} else {
inc = direction > 0 ? 1 : -1;
i = offset + ( inc > 0 ? 0 : -1 );
do {
if ( data[i].type === undefined ) {
// Plain text extraction
if ( pattern.test( typeof data[i] === 'string' ? data[i] : data[i][0] ) ) {
break;
}
// If the cursor offset is a break (i.e. the start/end of word) we should
// check one position either side to see if there is a non-break
// and if so, move the offset accordingly
if( unicodeJS.wordbreak.isBreakInTextString( dataString, offset ) ) {
if ( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset + 1 ) ) {
offset++;
} else if( !unicodeJS.wordbreak.isBreakInTextString( dataString, offset - 1 ) ) {
offset--;
} else {
// just return one character to the right, unless we are at the end
// of the text, in which case the character to the left
if( dataString.read( offset ) !== null ) {
return new ve.Range( offset, offset + 1 );
} else {
break;
return new ve.Range( offset - 1, offset );
}
} while ( data[i += inc] );
return i + ( inc > 0 ? 0 : 1 );
}
}
i = offset;
// Search left and right for next break points
while( dataString.read( i++ ) !== null ) {
offsetRight = i;
if( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
break;
}
}
i = offset;
while( dataString.read( i-- ) !== null ) {
offsetLeft = i;
if( unicodeJS.wordbreak.isBreakInTextString( dataString, i ) ) {
break;
}
}
return new ve.Range( offsetLeft, offsetRight );
};
/**

View file

@ -216,7 +216,7 @@ ve.dm.SurfaceFragment.prototype.trimRange = function () {
*
* @method
* @param {string} [scope='parent'] Method of expansion:
* - `word`: Expands to cover the nearest word by looking for word boundary characters
* - `word`: Expands to cover the nearest word by looking for word breaks (see UnicodeJS.wordbreak)
* - `annotation`: Expands to cover a given annotation (argument) within the current range
* - `root`: Expands to cover the entire document
* - `siblings`: Expands to cover all sibling nodes
@ -233,10 +233,18 @@ ve.dm.SurfaceFragment.prototype.expandRange = function ( scope, type ) {
var range, node, nodes, parent;
switch ( scope || 'parent' ) {
case 'word':
range = new ve.Range(
this.document.getNearestWordBoundary( this.range.start, -1 ),
this.document.getNearestWordBoundary( this.range.end, 1 )
);
if( this.range.getLength() > 0 ) {
range = ve.Range.newCoveringRange( [
this.document.getNearestWordRange( this.range.start ),
this.document.getNearestWordRange( this.range.end )
] );
if( this.range.isBackwards() ) {
range = range.flip();
}
} else {
// optimisation for zero-length ranges
range = this.document.getNearestWordRange( this.range.start );
}
break;
case 'annotation':
range = this.document.getAnnotatedRangeFromSelection( this.range, type );

View file

@ -1332,8 +1332,8 @@ QUnit.test( 'getNearestStructuralOffset', function ( assert ) {
}
} );
QUnit.test( 'getNearestWordBoundary', function ( assert ) {
var i, doc, left, right, word,
QUnit.test( 'getNearestWordRange', function ( assert ) {
var i, doc, range, word,
cases = [
{
'phrase': 'visual editor test',
@ -1341,23 +1341,59 @@ QUnit.test( 'getNearestWordBoundary', function ( assert ) {
'offset': 10,
'expected': 'editor'
},
{
'phrase': 'visual editor test',
'msg': 'cursor at start of word',
'offset': 7,
'expected': 'editor'
},
{
'phrase': 'visual editor test',
'msg': 'cursor at end of word',
'offset': 13,
'expected': 'editor'
},
{
'phrase': 'visual editor test',
'msg': 'cursor at start of text',
'offset': 0,
'expected': 'visual'
},
{
'phrase': 'visual editor test',
'msg': 'cursor at end of text',
'offset': 18,
'expected': 'test'
},
{
'phrase': 'Computer-aided design',
'msg': 'hyphenated Latin word',
'offset': 2,
'expected': 'Computer-aided'
'offset': 12,
'expected': 'aided'
},
{
'phrase': 'Water (l\'eau) is',
'msg': 'apostrophe and parentheses (Latin)',
'offset': 8,
'expected': '(l\'eau)'
'expected': 'l\'eau'
},
{
'phrase': 'Water (H2O) is',
'msg': 'number in word (Latin)',
'offset': 9,
'expected': '(H2O)'
'expected': 'H2O'
},
{
'phrase': 'The \'word\' is',
'msg': 'apostrophes as single quotes',
'offset': 7,
'expected': 'word'
},
{
'phrase': 'Some "double" quotes',
'msg': 'double quotes',
'offset': 8,
'expected': 'double'
},
{
'phrase': 'Wikipédia l\'encyclopédie libre',
@ -1365,6 +1401,12 @@ QUnit.test( 'getNearestWordBoundary', function ( assert ) {
'offset': 15,
'expected': 'l\'encyclopédie'
},
{
'phrase': 'Wikipédia l\'encyclopédie libre',
'msg': 'Extend characters (i.e. letter + accent)',
'offset': 15,
'expected': 'l\'encyclopédie'
},
{
'phrase': 'Википедия свободная энциклопедия',
'msg': 'Cyrillic word',
@ -1388,15 +1430,49 @@ QUnit.test( 'getNearestWordBoundary', function ( assert ) {
'msg': 'Eastern Arabic numerals',
'offset': 13,
'expected': '٠١٢٣٤٥٦٧٨٩'
},
{
'phrase': 'Latinカタカナwrapped',
'msg': 'Latin-wrapped Katakana word',
'offset': 7,
'expected': 'カタカナ'
},
{
'phrase': '维基百科',
'msg': 'Hanzi characters (cursor in middle)',
'offset': 2,
'expected': '百'
},
{
'phrase': '维基百科',
'msg': 'Hanzi characters (cursor at end)',
'offset': 4,
'expected': '科'
},
{
'phrase': 'Costs £1,234.00 each',
'msg': 'formatted number sequence',
'offset': 11,
'expected': '1,234.00'
},
{
'phrase': 'Reset index_of variable',
'msg': 'underscore-joined word',
'offset': 8,
'expected': 'index_of'
}
];
QUnit.expect( cases.length );
for ( i = 0; i < cases.length; i++ ) {
doc = new ve.dm.Document( cases[i].phrase.split('') );
left = doc.getNearestWordBoundary( cases[i].offset, -1 );
right = doc.getNearestWordBoundary( cases[i].offset, 1 );
word = cases[i].phrase.substring( left, right );
assert.strictEqual( word, cases[i].expected, cases[i].msg );
range = doc.getNearestWordRange( cases[i].offset );
word = cases[i].phrase.substring( range.start, range.end );
assert.strictEqual( word, cases[i].expected,
cases[i].msg + ': ' +
cases[i].phrase.substring( 0, cases[i].offset ) + '│' +
cases[i].phrase.substring( cases[i].offset, cases[i].phrase.length ) +
' → ' + cases[i].expected
);
}
} );

View file

@ -59,7 +59,7 @@ QUnit.test( 'collapseRange', 3, function ( assert ) {
assert.deepEqual( collapsedFragment.getRange(), new ve.Range( 20, 20 ), 'new range is used' );
} );
QUnit.test( 'expandRange', 1, function ( assert ) {
QUnit.test( 'expandRange (closest)', 1, function ( assert ) {
var doc = new ve.dm.Document( ve.copyArray( ve.dm.example.data ) ),
surface = new ve.dm.Surface( doc ),
fragment = new ve.dm.SurfaceFragment( surface, new ve.Range( 20, 21 ) );
@ -70,6 +70,40 @@ QUnit.test( 'expandRange', 1, function ( assert ) {
);
} );
QUnit.test( 'expandRange (word)', 1, function ( assert ) {
var i, doc, surface, fragment, newFragment, range, word, cases = [
{
phrase: 'the quick brown fox',
range: new ve.Range( 6, 13 ),
expected: 'quick brown',
msg: 'range starting and ending in latin words'
},
{
phrase: 'the quick brown fox',
range: new ve.Range( 18, 12 ),
expected: 'brown fox',
msg: 'backwards range starting and ending in latin words'
},
{
phrase: 'the quick brown fox',
range: new ve.Range( 7, 7 ),
expected: 'quick',
msg: 'zero-length range'
}
];
QUnit.expect( cases.length*2 );
for ( i = 0; i < cases.length; i++ ) {
doc = new ve.dm.Document( cases[i].phrase.split('') );
surface = new ve.dm.Surface( doc );
fragment = new ve.dm.SurfaceFragment( surface, cases[i].range );
newFragment = fragment.expandRange( 'word' );
range = newFragment.getRange();
word = cases[i].phrase.substring( range.start, range.end );
assert.strictEqual( word, cases[i].expected, cases[i].msg + ': text' );
assert.strictEqual( cases[i].range.isBackwards(), range.isBackwards(), cases[i].msg + ': range direction' );
}
} );
QUnit.test( 'removeContent', 2, function ( assert ) {
var doc = new ve.dm.Document( ve.copyArray( ve.dm.example.data ) ),
surface = new ve.dm.Surface( doc ),

View file

@ -13,6 +13,10 @@
<script src="../../jquery/jquery.js"></script>
<script src="../../rangy/rangy-core.js"></script>
<script src="../../rangy/rangy-position.js"></script>
<script src="../../unicodejs/unicodejs.js"></script>
<script src="../../unicodejs/unicodejs.textstring.js"></script>
<script src="../../unicodejs/unicodejs.wordbreak.groups.js"></script>
<script src="../../unicodejs/unicodejs.wordbreak.js"></script>
<!-- ext.visualEditor.base -->
<script src="../../ve/ve.js"></script>
<script src="../../ve/ve.EventEmitter.js"></script>
@ -70,6 +74,7 @@
<script src="../../ve/dm/ve.dm.Transaction.js"></script>
<script src="../../ve/dm/ve.dm.Surface.js"></script>
<script src="../../ve/dm/ve.dm.SurfaceFragment.js"></script>
<script src="../../ve/dm/ve.dm.DataString.js"></script>
<script src="../../ve/dm/ve.dm.Document.js"></script>
<script src="../../ve/dm/ve.dm.DocumentSlice.js"></script>
<script src="../../ve/dm/ve.dm.DocumentSynchronizer.js"></script>