diff --git a/engines/LuaCommon/UstringLibrary.php b/engines/LuaCommon/UstringLibrary.php index f9aa26a8..d6661b3f 100644 --- a/engines/LuaCommon/UstringLibrary.php +++ b/engines/LuaCommon/UstringLibrary.php @@ -232,7 +232,7 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase { } /* Convert a Lua pattern into a PCRE regex */ - private function patternToRegex( $pattern ) { + private function patternToRegex( $pattern, $noAnchor = false ) { $pat = preg_split( '//us', $pattern, null, PREG_SPLIT_NO_EMPTY ); static $charsets = null, $brcharsets = null; @@ -295,7 +295,7 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase { switch ( $pat[$i] ) { case '^': $q = $i; - $re .= $q ? '\\^' : '^'; + $re .= ( $noAnchor || $q ) ? '\\^' : '^'; break; case '$': @@ -497,7 +497,7 @@ class Scribunto_LuaUstringLibrary extends Scribunto_LuaLibraryBase { $this->checkString( 'gmatch', $s ); $this->checkPattern( 'gmatch', $pattern ); - list( $re, $capt ) = $this->patternToRegex( $pattern ); + list( $re, $capt ) = $this->patternToRegex( $pattern, true ); return array( $re, $capt ); } diff --git a/engines/LuaCommon/lualib/mw.ustring.lua b/engines/LuaCommon/lualib/mw.ustring.lua index b886124d..d4375a45 100644 --- a/engines/LuaCommon/lualib/mw.ustring.lua +++ b/engines/LuaCommon/lualib/mw.ustring.lua @@ -14,10 +14,6 @@ local function php_gmatch( s, pattern ) checkType( 'gmatch', 1, s, 'string' ) checkType( 'gmatch', 2, pattern, 'string' ) - if string.sub( pattern, 1, 1 ) == '^' then - return function() return nil end, nil, nil - end - local re, capt = gmatch_init( s, pattern ) local pos = 0 return function() diff --git a/engines/LuaCommon/lualib/ustring/ustring.lua b/engines/LuaCommon/lualib/ustring/ustring.lua index 18c340b4..2c95db4f 100644 --- a/engines/LuaCommon/lualib/ustring/ustring.lua +++ b/engines/LuaCommon/lualib/ustring/ustring.lua @@ -397,10 +397,11 @@ setmetatable( charset_cache, { __weak = 'kv' } ) -- @param rawpat string Pattern -- @param pattern table Exploded pattern -- @param init int Starting index +-- @param noAnchor boolean True to ignore '^' -- @return int starting index of the match -- @return int ending index of the match -- @return string|int* captures -local function find( s, cps, rawpat, pattern, init ) +local function find( s, cps, rawpat, pattern, init, noAnchor ) local charsets = require 'ustring/charsets' local anchor = false local ncapt, captures @@ -676,7 +677,7 @@ local function find( s, cps, rawpat, pattern, init ) -- match. local sp = init local pp = 1 - if pattern.codepoints[1] == 0x5e then -- '^': Pattern is anchored + if not noAnchor and pattern.codepoints[1] == 0x5e then -- '^': Pattern is anchored anchor = true pp = 2 end @@ -838,15 +839,8 @@ function ustring.gmatch( s, pattern ) end local init = 1 - if pat.codepoints[1] == 0x5e then -- '^': Pattern is anchored - -- Lua special-cases this to never match - return function () - return nil - end - end - return function () - local m = { find( s, cps, pattern, pat, init ) } + local m = { find( s, cps, pattern, pat, init, true ) } if not m[1] then return nil end diff --git a/tests/engines/LuaCommon/UstringLibraryTests.lua b/tests/engines/LuaCommon/UstringLibraryTests.lua index 930a9a62..7a6e2015 100644 --- a/tests/engines/LuaCommon/UstringLibraryTests.lua +++ b/tests/engines/LuaCommon/UstringLibraryTests.lua @@ -396,8 +396,8 @@ return testframework.getTestProvider( { type = 'Iterator' }, { name = 'gmatch: anchored', func = mw.ustring.gmatch, - args = { str2, '^f%a+' }, - expect = {}, + args = { "fóó1 ^fóó2 fóó3 ^fóó4", '^fóó%d+' }, + expect = { { "^fóó2" }, { "^fóó4" } }, type = 'Iterator' },