From d8314539daa86b000de0ad603b967e91563a07f8 Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Wed, 3 Jul 2013 11:41:25 -0400 Subject: [PATCH] Fix mw.ustring edge cases A few edge cases were being incorrectly handled: * mw.ustring.sub( 'abc', 1, 0 ) returned 'a', not ''. * mw.ustring.codepoint( 'abc', 1, 0 ) returned 97, not no results. * mw.ustring.codepoint( 'abc', 4, 4 ) returned 99, not no results. * mw.ustring.gcodepoint had the same issues as mw.ustring.codepoint. Change-Id: Ib8c0ef5a8073106eb7d90d0aa0513be4525dca08 --- engines/LuaCommon/UstringLibrary.php | 10 ++++-- engines/LuaCommon/lualib/ustring/ustring.lua | 10 ++++-- .../engines/LuaCommon/UstringLibraryTests.lua | 36 ++++++++++++++++++- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/engines/LuaCommon/UstringLibrary.php b/engines/LuaCommon/UstringLibrary.php index fddffa83..3c164864 100644 --- a/engines/LuaCommon/UstringLibrary.php +++ b/engines/LuaCommon/UstringLibrary.php @@ -137,8 +137,11 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase { if ( $j < 0 ) { $j = $l + $j + 1; } - $i = max( 1, min( $i, $l ) ); - $j = max( 1, min( $j, $l ) ); + if ( $j < $i ) { + return array(); + } + $i = max( 1, min( $i, $l + 1 ) ); + $j = max( 1, min( $j, $l + 1 ) ); $s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' ); return unpack( 'N*', mb_convert_encoding( $s, 'UTF-32BE', 'UTF-8' ) ); } @@ -203,6 +206,9 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase { if ( $j < 0 ) { $j = $len + $j + 1; } + if ( $j < $i ) { + return array( '' ); + } $i = max( 1, min( $i, $len + 1 ) ); $j = max( 1, min( $j, $len + 1 ) ); $s = mb_substr( $s, $i - 1, $j - $i + 1, 'UTF-8' ); diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua b/engines/LuaCommon/lualib/ustring/ustring.lua index 87f3b4ab..55766243 100644 --- a/engines/LuaCommon/lualib/ustring/ustring.lua +++ b/engines/LuaCommon/lualib/ustring/ustring.lua @@ -241,8 +241,11 @@ function ustring.codepoint( s, i, j ) if j < 0 then j = cps.len + j + 1 end - i = math.max( 1, math.min( i, cps.len ) ) - j = math.max( 1, math.min( j, cps.len ) ) + if j < i then + return -- empty result set + end + i = math.max( 1, math.min( i, cps.len + 1 ) ) + j = math.max( 1, math.min( j, cps.len + 1 ) ) return unpack( cps.codepoints, i, j ) end @@ -352,6 +355,9 @@ function ustring.sub( s, i, j ) if j < 0 then j = cps.len + j + 1 end + if j < i then + return '' + end i = math.max( 1, math.min( i, cps.len + 1 ) ) j = math.max( 1, math.min( j, cps.len + 1 ) ) return sub( s, cps, i, j ) diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua b/tests/engines/LuaCommon/UstringLibraryTests.lua index c0e57600..4b0c0391 100644 --- a/tests/engines/LuaCommon/UstringLibraryTests.lua +++ b/tests/engines/LuaCommon/UstringLibraryTests.lua @@ -91,11 +91,22 @@ return testframework.getTestProvider( { args = { str1, 1, -1 }, expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff } }, - { name = 'codepoint: substring', func = mw.ustring.codepoint, args = { str1, 5, -2 }, expect = { 0x800, 0xffff, 0x10000 } }, + { name = 'codepoint: (5,4)', func = mw.ustring.codepoint, + args = { str1, 5, 4 }, + expect = {} + }, + { name = 'codepoint: (1,0)', func = mw.ustring.codepoint, + args = { str1, 1, 0 }, + expect = {} + }, + { name = 'codepoint: (9,9)', func = mw.ustring.codepoint, + args = { str1, 9, 9 }, + expect = {} + }, { name = 'char: basic test', func = mw.ustring.char, args = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }, @@ -151,10 +162,18 @@ return testframework.getTestProvider( { args = { str1, 4, 3 }, expect = { "" } }, + { name = 'sub: (1,0)', func = mw.ustring.sub, + args = { str2, 1, 0 }, + expect = { "" } + }, { name = 'sub: (5,5)', func = mw.ustring.sub, args = { str1, 5, 5 }, expect = { "\224\160\128" } }, + { name = 'sub: (9,9)', func = mw.ustring.sub, + args = { str1, 9, 9 }, + expect = { "" } + }, { name = 'sub: empty string', func = mw.ustring.sub, args = { '', 5 }, expect = { "" } @@ -456,6 +475,21 @@ return testframework.getTestProvider( { expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } }, type = 'Iterator' }, + { name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint, + args = { str1, 4, 3 }, + expect = {}, + type = 'Iterator' + }, + { name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint, + args = { str1, 1, 0 }, + expect = {}, + type = 'Iterator' + }, + { name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint, + args = { str1, 9, 9 }, + expect = {}, + type = 'Iterator' + }, { name = 'gmatch: test string 1', func = mw.ustring.gmatch, args = { str2, 'f%a+' },