ustring: Match undocumented string.gsub behavior

As documented, string.gub( 'foo', '%a', '%1' ) should raise an invalid
capture index error because there is no capture with index 1 in the
pattern. But in fact it treats %1 as %0 in this situation. The ustring
library should match this behavior.

This patch also adds some tests for the behavior of gsub with table and
function replacements when the pattern does have captures.

Bug: T207623
Change-Id: Ie3e6c2eafa4a05989815c62c7037167642581751
This commit is contained in:
Brad Jorsch 2018-10-22 10:49:17 -04:00 committed by jenkins-bot
parent fa7481b6f6
commit 18c08c23fc
3 changed files with 30 additions and 6 deletions

View file

@ -664,6 +664,9 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase {
return $m[0];
} elseif ( isset( $m["m$x"] ) ) {
return $m["m$x"];
} elseif ( $x === '1' ) {
// Match undocumented Lua string.gsub behavior
return $m[0];
} else {
throw new Scribunto_LuaError( "invalid capture index %$x in replacement string" );
}

View file

@ -1002,15 +1002,20 @@ function ustring.gsub( s, pattern, repl, n )
ret[#ret + 1] = sub( s, cps, init, m[1] - 1 )
end
local mm = sub( s, cps, m[1], m[2] )
-- This simplifies the code for the function and table cases (tp == 1 and tp == 2) when there are
-- no captures in the pattern. As documented it would be incorrect for the string case by making
-- %1 act like %0 instead of raising an "invalid capture index" error, but Lua in fact does
-- exactly that for string.gsub.
if #m < 3 then
m[3] = mm
end
local val, valType
if tp == 1 then
if m[3] then
val = repl( unpack( m, 3 ) )
else
val = repl( mm )
end
elseif tp == 2 then
val = repl[m[3] or mm]
val = repl[m[3]]
elseif tp == 3 then
if ct == 0 and #m < 11 then
local ss = S.gsub( repl, '%%[%%0-' .. ( #m - 2 ) .. ']', 'x' )

View file

@ -572,6 +572,10 @@ return testframework.getTestProvider( {
args = { 'foo; fóó', '(f)(%a+)', '%%0=%0 %%1=%1 %%2=%2' },
expect = { '%0=foo %1=f %2=oo; %0=fóó %1=f %2=óó', 2 }
},
{ name = 'gsub: string, undocumented behavior where %1 works as %0 if there are no captures', func = mw.ustring.gsub,
args = { 'foo; fóó', '%a+', '%1!' },
expect = { 'foo!; fóó!', 2 }
},
{ name = 'gsub: (anchored)', func = mw.ustring.gsub,
args = { 'foofoofoo foo', '^foo', 'X' },
expect = { 'Xfoofoo foo', 1 }
@ -588,6 +592,14 @@ return testframework.getTestProvider( {
args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 },
expect = { str2, 1 }
},
{ name = 'gsub: (table 4)', func = mw.ustring.gsub,
args = { str3, 'f(%a+)', { oo = 'X', ['őó'] = 'Y', [''] = 'Z' } },
expect = { '??? X bar Y Z baz foooo foofoo fo ok?', 6 }
},
{ name = 'gsub: (table 5)', func = mw.ustring.gsub,
args = { str3, '(f)(%a+)', { f = 'F', oo = 'X', ['őó'] = 'Y', [''] = 'Z' } },
expect = { '??? F bar F F baz F F F ok?', 6 }
},
{ name = 'gsub: (inverted zero character class)', func = mw.ustring.gsub,
args = { "ó", '%Z', 'repl' },
expect = { 'repl', 1 }
@ -624,6 +636,10 @@ return testframework.getTestProvider( {
args = { str3, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
expect = { '??? -FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo ok?', 6 }
},
{ name = 'gsub: (function 3)', func = mw.ustring.gsub,
args = { str3, '(f)(%a+)', function(m1, m2) if m2 == 'o' then return nil end return '-' .. m1 .. mw.ustring.upper(m2) .. '-' end },
expect = { '??? -fOO- bar -fŐÓ- -fOÓ- baz -fOOOO- -fOOFOO- fo ok?', 6 }
},
{ name = 'gsub: invalid replacement string', func = mw.ustring.gsub,
args = { 'foo; fóó', '(%a+)', '%2' },
expect = "invalid capture index %2 in replacement string"