Fix a problem with simple pattern detection

A string with a dot pattern is only "simple" if
followed by +, - or *. The end of string condition was not checked
properly.

Change-Id: Ia10b9164caeabe464c76441cc82eef37a7013048
This commit is contained in:
Jan Berkel 2015-10-06 10:25:30 +01:00 committed by Brad Jorsch
parent dfb59d508d
commit fb20934b16
2 changed files with 34 additions and 7 deletions

View file

@ -740,11 +740,10 @@ end
-- inside brackets and aren't followed by quantifiers and aren't part of a -- inside brackets and aren't followed by quantifiers and aren't part of a
-- '%b', but that's too complicated to check. -- '%b', but that's too complicated to check.
-- * If it contains a negated character set. -- * If it contains a negated character set.
-- * If it contains "%a" or any of the other %-prefixed character sets except -- * If it contains "%a" or any of the other %-prefixed character sets except %z.
-- %z or %Z. -- * If it contains a '.' not followed by '*', '+', '-'. A bare '.' or '.?'
-- * If it contains a '.' not followed by '*', '+', or '-'. A bare '.' or '.?' -- matches a partial UTF-8 character, but the others will happily enough
-- would try to match a partial UTF-8 character, but the others will happily -- match a whole UTF-8 character thinking it's 2, 3 or 4.
-- enough match a whole character thinking it's 2 or 4.
-- * If it contains position-captures. -- * If it contains position-captures.
-- --
-- @param string pattern -- @param string pattern
@ -753,8 +752,8 @@ local function patternIsSimple( pattern )
return not ( return not (
S.find( pattern, '[\128-\255]' ) or S.find( pattern, '[\128-\255]' ) or
S.find( pattern, '%[%^' ) or S.find( pattern, '%[%^' ) or
S.find( pattern, '%%[acdlpsuwxACDLPSUWX]' ) or S.find( pattern, '%%[acdlpsuwxACDLPSUWXZ]' ) or
S.find( pattern, '%.[^*+-]' ) or S.find( pattern, '%.[^*+-]' ) or S.find( pattern, '%.$' ) or
S.find( pattern, '()', 1, true ) S.find( pattern, '()', 1, true )
) )
end end

View file

@ -511,6 +511,34 @@ return testframework.getTestProvider( {
args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 }, args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 },
expect = { str2, 1 } expect = { str2, 1 }
}, },
{ name = 'gsub: (inverted zero character class)', func = mw.ustring.gsub,
args = { "ó", '%Z', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (single dot pattern at end)', func = mw.ustring.gsub,
args = { "ó", '.', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (single dot pattern at end + leading)', func = mw.ustring.gsub,
args = { '', 'f.', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern)', func = mw.ustring.gsub,
args = { 'f ó b', 'f . b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with +)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .+ b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with -)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .- b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with *)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .* b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (function 1)', func = mw.ustring.gsub, { name = 'gsub: (function 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end }, args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 } expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 }