diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua b/engines/LuaCommon/lualib/ustring/ustring.lua index da757e8b..c6462177 100644 --- a/engines/LuaCommon/lualib/ustring/ustring.lua +++ b/engines/LuaCommon/lualib/ustring/ustring.lua @@ -740,11 +740,10 @@ end -- inside brackets and aren't followed by quantifiers and aren't part of a -- '%b', but that's too complicated to check. -- * If it contains a negated character set. --- * If it contains "%a" or any of the other %-prefixed character sets except --- %z or %Z. --- * If it contains a '.' not followed by '*', '+', or '-'. A bare '.' or '.?' --- would try to match a partial UTF-8 character, but the others will happily --- enough match a whole character thinking it's 2 or 4. +-- * If it contains "%a" or any of the other %-prefixed character sets except %z. +-- * If it contains a '.' not followed by '*', '+', '-'. A bare '.' or '.?' +-- matches a partial UTF-8 character, but the others will happily enough +-- match a whole UTF-8 character thinking it's 2, 3 or 4. -- * If it contains position-captures. -- -- @param string pattern @@ -753,8 +752,8 @@ local function patternIsSimple( pattern ) return not ( S.find( pattern, '[\128-\255]' ) or S.find( pattern, '%[%^' ) or - S.find( pattern, '%%[acdlpsuwxACDLPSUWX]' ) or - S.find( pattern, '%.[^*+-]' ) or + S.find( pattern, '%%[acdlpsuwxACDLPSUWXZ]' ) or + S.find( pattern, '%.[^*+-]' ) or S.find( pattern, '%.$' ) or S.find( pattern, '()', 1, true ) ) end diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua b/tests/engines/LuaCommon/UstringLibraryTests.lua index d0a415f9..5a382265 100644 --- a/tests/engines/LuaCommon/UstringLibraryTests.lua +++ b/tests/engines/LuaCommon/UstringLibraryTests.lua @@ -511,6 +511,34 @@ return testframework.getTestProvider( { args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 }, expect = { str2, 1 } }, + { name = 'gsub: (inverted zero character class)', func = mw.ustring.gsub, + args = { "ó", '%Z', 'repl' }, + expect = { 'repl', 1 } + }, + { name = 'gsub: (single dot pattern at end)', func = mw.ustring.gsub, + args = { "ó", '.', 'repl' }, + expect = { 'repl', 1 } + }, + { name = 'gsub: (single dot pattern at end + leading)', func = mw.ustring.gsub, + args = { 'fó', 'f.', 'repl' }, + expect = { 'repl', 1 } + }, + { name = 'gsub: (dot pattern)', func = mw.ustring.gsub, + args = { 'f ó b', 'f . b', 'repl' }, + expect = { 'repl', 1 } + }, + { name = 'gsub: (dot pattern with +)', func = mw.ustring.gsub, + args = { 'f óóó b', 'f .+ b', 'repl' }, + expect = { 'repl', 1 } + }, + { name = 'gsub: (dot pattern with -)', func = mw.ustring.gsub, + args = { 'f óóó b', 'f .- b', 'repl' }, + expect = { 'repl', 1 } + }, + { name = 'gsub: (dot pattern with *)', func = mw.ustring.gsub, + args = { 'f óóó b', 'f .* b', 'repl' }, + expect = { 'repl', 1 } + }, { name = 'gsub: (function 1)', func = mw.ustring.gsub, args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end }, expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 }