Fix mw.ustring edge cases

A few edge cases were being incorrectly handled:
* mw.ustring.sub( 'abc', 1, 0 ) returned 'a', not ''.
* mw.ustring.codepoint( 'abc', 1, 0 ) returned 97, not no results.
* mw.ustring.codepoint( 'abc', 4, 4 ) returned 99, not no results.
* mw.ustring.gcodepoint had the same issues as mw.ustring.codepoint.

Change-Id: Ib8c0ef5a8073106eb7d90d0aa0513be4525dca08
This commit is contained in:
Brad Jorsch 2013-07-03 11:41:25 -04:00
parent ad3e5a7c48
commit d8314539da
3 changed files with 51 additions and 5 deletions

View file

@ -137,8 +137,11 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase {
if ( $j < 0 ) {
$j = $l + $j + 1;
}
$i = max( 1, min( $i, $l ) );
$j = max( 1, min( $j, $l ) );
if ( $j < $i ) {
return array();
}
$i = max( 1, min( $i, $l + 1 ) );
$j = max( 1, min( $j, $l + 1 ) );
$s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' );
return unpack( 'N*', mb_convert_encoding( $s, 'UTF-32BE', 'UTF-8' ) );
}
@ -203,6 +206,9 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase {
if ( $j < 0 ) {
$j = $len + $j + 1;
}
if ( $j < $i ) {
return array( '' );
}
$i = max( 1, min( $i, $len + 1 ) );
$j = max( 1, min( $j, $len + 1 ) );
$s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' );

View file

@ -241,8 +241,11 @@ function ustring.codepoint( s, i, j )
if j < 0 then
j = cps.len + j + 1
end
i = math.max( 1, math.min( i, cps.len ) )
j = math.max( 1, math.min( j, cps.len ) )
if j < i then
return -- empty result set
end
i = math.max( 1, math.min( i, cps.len + 1 ) )
j = math.max( 1, math.min( j, cps.len + 1 ) )
return unpack( cps.codepoints, i, j )
end
@ -352,6 +355,9 @@ function ustring.sub( s, i, j )
if j < 0 then
j = cps.len + j + 1
end
if j < i then
return ''
end
i = math.max( 1, math.min( i, cps.len + 1 ) )
j = math.max( 1, math.min( j, cps.len + 1 ) )
return sub( s, cps, i, j )

View file

@ -91,11 +91,22 @@ return testframework.getTestProvider( {
args = { str1, 1, -1 },
expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
},
{ name = 'codepoint: substring', func = mw.ustring.codepoint,
args = { str1, 5, -2 },
expect = { 0x800, 0xffff, 0x10000 }
},
{ name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
args = { str1, 5, 4 },
expect = {}
},
{ name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
args = { str1, 1, 0 },
expect = {}
},
{ name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
args = { str1, 9, 9 },
expect = {}
},
{ name = 'char: basic test', func = mw.ustring.char,
args = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff },
@ -151,10 +162,18 @@ return testframework.getTestProvider( {
args = { str1, 4, 3 },
expect = { "" }
},
{ name = 'sub: (1,0)', func = mw.ustring.sub,
args = { str2, 1, 0 },
expect = { "" }
},
{ name = 'sub: (5,5)', func = mw.ustring.sub,
args = { str1, 5, 5 },
expect = { "\224\160\128" }
},
{ name = 'sub: (9,9)', func = mw.ustring.sub,
args = { str1, 9, 9 },
expect = { "" }
},
{ name = 'sub: empty string', func = mw.ustring.sub,
args = { '', 5 },
expect = { "" }
@ -456,6 +475,21 @@ return testframework.getTestProvider( {
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
args = { str1, 4, 3 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
args = { str1, 1, 0 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
args = { str1, 9, 9 },
expect = {},
type = 'Iterator'
},
{ name = 'gmatch: test string 1', func = mw.ustring.gmatch,
args = { str2, 'f%a+' },