mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Scribunto
synced 2024-11-27 17:50:06 +00:00
Fix deceptively-simple pattern in pure-Lua ustring
The pure-Lua ustring pattern matching functions short-circuit to the much faster string library when the pattern would match the same against the raw bytes. A pattern like "[^a-z]" can match a partial UTF-8 character when applied bytewise, and so must be detected as unsafe. Let's also directly test the pure-Lua module, instead of me having to comment out lines in Scribunto_LuaUstringLibrary::register() whenever I want to test them. Change-Id: I91ed3374aadfea379b9db2e13b4248ab20df509e
This commit is contained in:
parent
40985a1672
commit
0367e9bddd
|
@ -735,6 +735,7 @@ end
|
|||
-- * If it contains any bytes over 0x7f. We could skip these if they're not
|
||||
-- inside brackets and aren't followed by quantifiers and aren't part of a
|
||||
-- '%b', but that's too complicated to check.
|
||||
-- * If it contains a negated character set.
|
||||
-- * If it contains "%a" or any of the other %-prefixed character sets except
|
||||
-- %z or %Z.
|
||||
-- * If it contains a '.' not followed by '*', '+', or '-'. A bare '.' or '.?'
|
||||
|
@ -747,6 +748,7 @@ end
|
|||
local function patternIsSimple( pattern )
|
||||
return not (
|
||||
S.find( pattern, '[\128-\255]' ) or
|
||||
S.find( pattern, '%[%^' ) or
|
||||
S.find( pattern, '%%[acdlpsuwxACDLPSUWX]' ) or
|
||||
S.find( pattern, '%.[^*+-]' ) or
|
||||
S.find( pattern, '()', 1, true )
|
||||
|
|
20
tests/engines/LuaCommon/UstringLibraryPureLuaTest.php
Normal file
20
tests/engines/LuaCommon/UstringLibraryPureLuaTest.php
Normal file
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
|
||||
require_once( __DIR__ . '/UstringLibraryTest.php' );
|
||||
|
||||
class Scribunto_LuaUstringLibraryPureLuaTests extends Scribunto_LuaUstringLibraryTests {
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
// Override mw.ustring with the pure-Lua version
|
||||
$interpreter = $this->getEngine()->getInterpreter();
|
||||
$interpreter->callFunction(
|
||||
$interpreter->loadString( '
|
||||
local ustring = require( "ustring" )
|
||||
ustring.maxStringLength = mw.ustring.maxStringLength
|
||||
ustring.maxPatternLength = mw.ustring.maxPatternLength
|
||||
mw.ustring = ustring
|
||||
', 'fortest' )
|
||||
);
|
||||
}
|
||||
}
|
|
@ -425,6 +425,10 @@ return testframework.getTestProvider( {
|
|||
args = { "foo foofóó foófoó bar", '(f%a+)%1' },
|
||||
expect = { 12, 17, 'foó' }
|
||||
},
|
||||
{ name = 'find: deceptively-simple pattern', func = mw.ustring.find,
|
||||
args = { "fóó", '([^a-z])' },
|
||||
expect = { 2, 2, 'ó' }
|
||||
},
|
||||
|
||||
{ name = 'match: (1)', func = mw.ustring.match,
|
||||
args = { "bar fóo bar", 'f%a+' },
|
||||
|
|
Loading…
Reference in a new issue