mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-13 17:48:17 +00:00
Merge "Revert model to use simple UTF-16 code units"
This commit is contained in:
commit
91f00b0eaf
|
@ -1,6 +0,0 @@
|
||||||
<p>12𨋢456789𨋢bc</p>
|
|
||||||
<p>「𨋢」字響<tt>香港</tt>衍生出好多新詞,好似:𨋢<tt>香港</tt> abc</p>
|
|
||||||
<p>abc</p>
|
|
||||||
<p>one c̀ombining accent</p>
|
|
||||||
<p>two ç̀ombining accents</p>
|
|
||||||
<p>def</p>
|
|
|
@ -24,7 +24,7 @@ for breaktype in ['Grapheme', 'Word']:
|
||||||
if not m:
|
if not m:
|
||||||
raise ValueError( "Bad line: %r" % line )
|
raise ValueError( "Bad line: %r" % line )
|
||||||
start, end, prop = m.groups()
|
start, end, prop = m.groups()
|
||||||
if start == 'D800' and end == 'DFFF':
|
if breaktype == 'Grapheme' and start == 'D800' and end == 'DFFF':
|
||||||
continue # raw surrogates are not treated
|
continue # raw surrogates are not treated
|
||||||
|
|
||||||
if not ranges.has_key( prop ):
|
if not ranges.has_key( prop ):
|
||||||
|
|
|
@ -179,7 +179,6 @@
|
||||||
throw new Error( 'range includes surrogates: ' +
|
throw new Error( 'range includes surrogates: ' +
|
||||||
min.toString( 16 ) + '-' + max.toString( 16 ) );
|
min.toString( 16 ) + '-' + max.toString( 16 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( max <= 0xFFFF ) {
|
if ( max <= 0xFFFF ) {
|
||||||
// interval is entirely BMP
|
// interval is entirely BMP
|
||||||
characterClass.push( codeUnitRange( min, max ) );
|
characterClass.push( codeUnitRange( min, max ) );
|
||||||
|
|
|
@ -148,12 +148,11 @@ ve.ce.Document.prototype.getNodeAndOffset = function ( offset ) {
|
||||||
}
|
}
|
||||||
item = current[0][current[1]];
|
item = current[0][current[1]];
|
||||||
if ( item.nodeType === Node.TEXT_NODE ) {
|
if ( item.nodeType === Node.TEXT_NODE ) {
|
||||||
// offset, startOffset and length are all data model lengths (not byte lengths)
|
length = item.textContent.length;
|
||||||
length = ve.getClusterOffset( item.textContent, item.textContent.length );
|
|
||||||
if ( offset >= startOffset && offset <= startOffset + length ) {
|
if ( offset >= startOffset && offset <= startOffset + length ) {
|
||||||
return {
|
return {
|
||||||
node: item,
|
node: item,
|
||||||
offset: ve.getByteOffset( item.textContent, offset - startOffset )
|
offset: offset - startOffset
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
startOffset += length;
|
startOffset += length;
|
||||||
|
|
|
@ -157,11 +157,10 @@ ve.ce.getOffsetFromTextNode = function ( domNode, domOffset ) {
|
||||||
item = current[0][current[1]];
|
item = current[0][current[1]];
|
||||||
if ( item.nodeType === Node.TEXT_NODE ) {
|
if ( item.nodeType === Node.TEXT_NODE ) {
|
||||||
if ( item === domNode ) {
|
if ( item === domNode ) {
|
||||||
// domOffset is a byte offset, convert it to a grapheme cluster offset
|
offset += domOffset;
|
||||||
offset += ve.getClusterOffset( item.textContent, domOffset );
|
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
offset += ve.getClusterOffset( item.textContent, item.textContent.length );
|
offset += item.textContent.length;
|
||||||
}
|
}
|
||||||
} else if ( item.nodeType === Node.ELEMENT_NODE ) {
|
} else if ( item.nodeType === Node.ELEMENT_NODE ) {
|
||||||
$item = current[0].eq( current[1] );
|
$item = current[0].eq( current[1] );
|
||||||
|
|
|
@ -260,13 +260,11 @@ QUnit.test( 'createDocumentFromHtml', function ( assert ) {
|
||||||
}
|
}
|
||||||
} );
|
} );
|
||||||
|
|
||||||
// ve.splitClusters: Tested upstream (UnicodeJS)
|
|
||||||
|
|
||||||
// TODO: ve.isUnattachedCombiningMark
|
// TODO: ve.isUnattachedCombiningMark
|
||||||
|
|
||||||
// TODO: ve.getByteOffset
|
// TODO: ve.getByteOffset
|
||||||
|
|
||||||
// TODO: ve.getCharacterOffset
|
// TODO: ve.getClusterOffset
|
||||||
|
|
||||||
QUnit.test( 'graphemeSafeSubstring', function ( assert ) {
|
QUnit.test( 'graphemeSafeSubstring', function ( assert ) {
|
||||||
var i, text = '12\ud860\udee245\ud860\udee2789\ud860\udee2bc', cases = [
|
var i, text = '12\ud860\udee245\ud860\udee2789\ud860\udee2bc', cases = [
|
||||||
|
|
|
@ -403,12 +403,14 @@
|
||||||
return ve.init.platform.getMessage.apply( ve.init.platform, arguments );
|
return ve.init.platform.getMessage.apply( ve.init.platform, arguments );
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @method
|
* Compatibility method. We no longer split into clusters at this level.
|
||||||
* @inheritdoc unicodeJS.graphemebreak#splitClusters
|
*
|
||||||
* @see unicodeJS.graphemebreak#splitClusters
|
* TODO: strip out calls to splitClusters then delete this method.
|
||||||
*/
|
*/
|
||||||
ve.splitClusters = unicodeJS.graphemebreak.splitClusters;
|
ve.splitClusters = function ( text ) {
|
||||||
|
return text.split( '' );
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine if the text consists of only unattached combining marks.
|
* Determine if the text consists of only unattached combining marks.
|
||||||
|
@ -428,7 +430,8 @@
|
||||||
* @returns {number} Byte offset
|
* @returns {number} Byte offset
|
||||||
*/
|
*/
|
||||||
ve.getByteOffset = function ( text, clusterOffset ) {
|
ve.getByteOffset = function ( text, clusterOffset ) {
|
||||||
return ve.splitClusters( text ).slice( 0, clusterOffset ).join( '' ).length;
|
return unicodeJS.graphemebreak.splitClusters( text ).slice( 0, clusterOffset
|
||||||
|
).join( '' ).length;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -439,7 +442,8 @@
|
||||||
* @returns {number} Grapheme cluster offset
|
* @returns {number} Grapheme cluster offset
|
||||||
*/
|
*/
|
||||||
ve.getClusterOffset = function ( text, byteOffset ) {
|
ve.getClusterOffset = function ( text, byteOffset ) {
|
||||||
return ve.splitClusters( text.substring( 0, byteOffset ) ).length;
|
return unicodeJS.graphemebreak.splitClusters( text.substring( 0, byteOffset
|
||||||
|
) ).length;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in a new issue