mediawiki-extensions-Scribunto/tests/phpunit/engines/LuaCommon/UstringLibraryTests.lua
Kunal Mehta 31820c673a Move tests into tests/phpunit and remove UnitTestsList hook
This takes advantage of extension.json's unit tests autodiscovery
mechanism.

Bug: T142120
Change-Id: Id526f3368fc73ba7e6ef1d793ea70ab05fbd9517
2017-07-07 13:50:39 -07:00

731 lines
23 KiB
Lua
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

local testframework = require 'Module:TestFramework'
local str1 = "\0\127\194\128\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191"
local str2 = "foo bar főó foó baz foooo foofoo fo"
local str3 = "??? foo bar főó foó baz foooo foofoo fo ok?"
local str4 = {}
for i = 1, 10000/4 do
str4[i] = "főó "
end
str4 = table.concat( str4 )
local function testLongGcodepoint()
local ret = {}
local i = 1
for cp in mw.ustring.gcodepoint( str4 ) do
if i <= 4 or i > 9996 then
ret[i] = cp
end
i = i + 1
end
return ret
end
return testframework.getTestProvider( {
{ name = 'isutf8: valid string', func = mw.ustring.isutf8,
args = { "\0 \127 \194\128 \223\191 \224\160\128 \239\191\191 \240\144\128\128 \244\143\191\191" },
expect = { true }
},
{ name = 'isutf8: out of range character', func = mw.ustring.isutf8,
args = { "\244\144\128\128" },
expect = { false }
},
{ name = 'isutf8: insufficient continuation bytes', func = mw.ustring.isutf8,
args = { "\240\128\128" },
expect = { false }
},
{ name = 'isutf8: excess continuation bytes', func = mw.ustring.isutf8,
args = { "\194\128\128" },
expect = { false }
},
{ name = 'isutf8: bare continuation byte', func = mw.ustring.isutf8,
args = { "\128" },
expect = { false }
},
{ name = 'isutf8: overlong encoding', func = mw.ustring.isutf8,
args = { "\192\128" },
expect = { false }
},
{ name = 'isutf8: overlong encoding (2)', func = mw.ustring.isutf8,
args = { "\193\191" },
expect = { false }
},
{ name = 'byteoffset: (1)', func = mw.ustring.byteoffset,
args = { "fóo", 1 },
expect = { 1 }
},
{ name = 'byteoffset: (2)', func = mw.ustring.byteoffset,
args = { "fóo", 2 },
expect = { 2 }
},
{ name = 'byteoffset: (3)', func = mw.ustring.byteoffset,
args = { "fóo", 3 },
expect = { 4 }
},
{ name = 'byteoffset: (4)', func = mw.ustring.byteoffset,
args = { "fóo", 4 },
expect = { nil }
},
{ name = 'byteoffset: (0,1)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 1 },
expect = { 1 }
},
{ name = 'byteoffset: (0,2)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 2 },
expect = { 2 }
},
{ name = 'byteoffset: (0,3)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 3 },
expect = { 2 }
},
{ name = 'byteoffset: (0,4)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 4 },
expect = { 4 }
},
{ name = 'byteoffset: (0,5)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 5 },
expect = { nil }
},
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
args = { "fóo", 0, -1 },
expect = { 4 }
},
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
args = { "foó", 0, -1 },
expect = { 3 }
},
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
args = { "fóo", 1, -1 },
expect = { 4 }
},
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
args = { "foó", 1, -1 },
expect = { nil }
},
{ name = 'codepoint: whole string', func = mw.ustring.codepoint,
args = { str1, 1, -1 },
expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
},
{ name = 'codepoint: substring', func = mw.ustring.codepoint,
args = { str1, 5, -2 },
expect = { 0x800, 0xffff, 0x10000 }
},
{ name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
args = { str1, 5, 4 },
expect = {}
},
{ name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
args = { str1, 1, 0 },
expect = {}
},
{ name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
args = { str1, 9, 9 },
expect = {}
},
{ name = 'codepoint: end of a really long string', func = mw.ustring.codepoint,
args = { str4, 9000, 9004 },
expect = { 0x20, 0x66, 0x151, 0xf3, 0x20 }
},
{ name = 'char: basic test', func = mw.ustring.char,
args = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff },
expect = { str1 }
},
{ name = 'char: invalid codepoint', func = mw.ustring.char,
args = { 0x110000 },
expect = "bad argument #1 to 'char' (value out of range)"
},
{ name = 'char: invalid value', func = mw.ustring.char,
args = { 'foo' },
expect = "bad argument #1 to 'char' (number expected, got string)"
},
{ name = 'len: basic test', func = mw.ustring.len,
args = { str1 },
expect = { 8 }
},
{ name = 'len: invalid string', func = mw.ustring.len,
args = { "\244\144\128\128" },
expect = { nil }
},
{ name = 'sub: (4)', func = mw.ustring.sub,
args = { str1, 4 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
},
{ name = 'sub: (4,7)', func = mw.ustring.sub,
args = { str1, 4, 7 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
},
{ name = 'sub: (4,-1)', func = mw.ustring.sub,
args = { str1, 4, -1 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
},
{ name = 'sub: (4,-2)', func = mw.ustring.sub,
args = { str1, 4, -2 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
},
{ name = 'sub: (-2)', func = mw.ustring.sub,
args = { str1, -2 },
expect = { "\240\144\128\128\244\143\191\191" }
},
{ name = 'sub: (9)', func = mw.ustring.sub,
args = { str1, 9 },
expect = { "" }
},
{ name = 'sub: (0)', func = mw.ustring.sub,
args = { str1, 0 },
expect = { str1 }
},
{ name = 'sub: (4,3)', func = mw.ustring.sub,
args = { str1, 4, 3 },
expect = { "" }
},
{ name = 'sub: (1,0)', func = mw.ustring.sub,
args = { str2, 1, 0 },
expect = { "" }
},
{ name = 'sub: (5,5)', func = mw.ustring.sub,
args = { str1, 5, 5 },
expect = { "\224\160\128" }
},
{ name = 'sub: (9,9)', func = mw.ustring.sub,
args = { str1, 9, 9 },
expect = { "" }
},
{ name = 'sub: empty string', func = mw.ustring.sub,
args = { '', 5 },
expect = { "" }
},
{ name = 'upper: basic test', func = mw.ustring.upper,
args = { "fóó?" },
expect = { "FÓÓ?" }
},
{ name = 'lower: basic test', func = mw.ustring.lower,
args = { "FÓÓ?" },
expect = { "fóó?" }
},
{ name = 'find: (simple)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡foo' },
expect = { 5, 8 }
},
{ name = 'find: (%)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo%+' },
expect = { }
},
{ name = 'find: (%)', func = mw.ustring.find,
args = { "bar ¡fo+ bar", '¡fo%+' },
expect = { 5, 8 }
},
{ name = 'find: (+)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo+' },
expect = { 5, 8 }
},
{ name = 'find: (+) (2)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fx+o+' },
expect = {}
},
{ name = 'find: (?)', func = mw.ustring.find,
args = { "bar ¡foox bar", '¡foox?' },
expect = { 5, 9 }
},
{ name = 'find: (?) (2)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡foox?' },
expect = { 5, 8 }
},
{ name = 'find: (*)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fx*oo' },
expect = { 5, 8 }
},
{ name = 'find: (-)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo-' },
expect = { 5, 6 }
},
{ name = 'find: (-)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo-o' },
expect = { 5, 7 }
},
{ name = 'find: (-)', func = mw.ustring.find,
args = { "bar ¡foox bar", '¡fo-x' },
expect = { 5, 9 }
},
{ name = 'find: (%a)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡f%a' },
expect = { 5, 7 }
},
{ name = 'find: (%a, utf8)', func = mw.ustring.find,
args = { "bar ¡fóó bar", '¡f%a' },
expect = { 5, 7 }
},
{ name = 'find: (%a, utf8 2)', func = mw.ustring.find,
args = { "bar ¡fóó bar", 'f%a' },
expect = { 6, 7 }
},
{ name = 'find: (%a+)', func = mw.ustring.find,
args = { "bar ¡fóó bar", '¡f%a+' },
expect = { 5, 8 }
},
{ name = 'find: ([]+)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '¡f[oó]+' },
expect = { 5, 8 }
},
{ name = 'find: ([-]+)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '¡f[a-uá-ú]+' },
expect = { 5, 8 }
},
{ name = 'find: ([-]+ 2)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '¡f[a-ú]+' },
expect = { 5, 8 }
},
{ name = 'find: (%b)', func = mw.ustring.find,
args = { "bar ¡<foo <foo> foo> bar", '¡%b<>' },
expect = { 5, 20 }
},
{ name = 'find: (%b 2)', func = mw.ustring.find,
args = { "bar ¡(foo (foo) foo) bar", '¡%b()' },
expect = { 5, 20 }
},
{ name = 'find: (%b 3)', func = mw.ustring.find,
args = { "bar ¡-foo-foo- bar", '¡%b--' },
expect = { 5, 10 }
},
{ name = 'find: (%b 4)', func = mw.ustring.find,
args = { "bar «foo «foo» foo» bar", '%b«»' },
expect = { 5, 19 }
},
{ name = 'find: (%b 5)', func = mw.ustring.find,
args = { "bar !foo !foo¡ foo¡ bar", '%b!¡' },
expect = { 5, 19 }
},
{ name = 'find: (%b 6)', func = mw.ustring.find,
args = { "bar ¡foo ¡foo! foo! bar", '%b¡!' },
expect = { 5, 19 }
},
{ name = 'find: (%b 7)', func = mw.ustring.find,
args = { "bar ¡foo¡foo¡ bar", '%b¡¡' },
expect = { 5, 9 }
},
{ name = 'find: (%f)', func = mw.ustring.find,
args = { "foo ¡foobar ¡foo bar baz", '¡.-%f[%s]' },
expect = { 5, 11 }
},
{ name = 'find: (%f 2)', func = mw.ustring.find,
args = { "foo ¡foobar ¡foo bar baz", '¡foo%f[%s]' },
expect = { 13, 16 }
},
{ name = 'find: (%f 3)', func = mw.ustring.find,
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]' },
expect = { 16, 19 }
},
{ name = 'find: (%f 4)', func = mw.ustring.find,
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]', 16 },
expect = { 16, 19 }
},
{ name = 'find: (%f 5)', func = mw.ustring.find,
args = { "foo ¡bar baz", '%f[%Z]' },
expect = { 1, 0 }
},
{ name = 'find: (%f 6)', func = mw.ustring.find,
args = { "foo ¡bar baz", '%f[%z]' },
expect = { 13, 12 }
},
{ name = 'find: (%f 7)', func = mw.ustring.find,
args = { "foo ¡b\0r baz", '%f[%Z]', 2 },
expect = { 8, 7 }
},
{ name = 'find: (%f 8)', func = mw.ustring.find,
args = { "\0foo ¡b\0r baz", '%f[%z]' },
expect = { 8, 7 }
},
{ name = 'find: (%f 9)', func = mw.ustring.find,
args = { "\0foo ¡b\0r baz", '%f[%Z]' },
expect = { 2, 1 }
},
{ name = 'find: (%A)', func = mw.ustring.find,
args = { "fóó? bar", '%A+' },
expect = { 4, 5 }
},
{ name = 'find: (%W)', func = mw.ustring.find,
args = { "fóó? bar", '%W+' },
expect = { 4, 5 }
},
{ name = 'find: ([^])', func = mw.ustring.find,
args = { "fóó? bar", '[^a-zó]+' },
expect = { 4, 5 }
},
{ name = 'find: ([^] 2)', func = mw.ustring.find,
args = { "fó0? bar", '[^%a0-9]+' },
expect = { 4, 5 }
},
{ name = 'find: ([^] 3)', func = mw.ustring.find,
args = { "¡fó0% bar", '¡[^%%]+' },
expect = { 1, 4 }
},
{ name = 'find: ($)', func = mw.ustring.find,
args = { "¡foo1 ¡foo2 ¡foo3", '¡foo[0-9]+$' },
expect = { 13, 17 }
},
{ name = 'find: (.*)', func = mw.ustring.find,
args = { "¡foo¡ ¡bar¡ baz", '¡.*¡' },
expect = { 1, 11 }
},
{ name = 'find: (.-)', func = mw.ustring.find,
args = { "¡foo¡ ¡bar¡ baz", '¡.-¡' },
expect = { 1, 5 }
},
{ name = 'find: plain', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '¡.¡', 1, true },
expect = { 5, 7 }
},
{ name = 'find: empty delimiter', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '' },
expect = { 1, 0 }
},
{ name = 'find: empty delimiter (2)', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 2 },
expect = { 2, 1 }
},
{ name = 'find: plain + empty delimiter', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 1, true },
expect = { 1, 0 }
},
{ name = 'find: plain + empty delimiter (2)', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 2, true },
expect = { 2, 1 }
},
{ name = 'find: excessive init', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '()', 20 },
expect = { 8, 7, 8 }
},
{ name = 'find: excessive init (2)', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '()', -20 },
expect = { 1, 0, 1 }
},
{ name = 'find: plain + excessive init', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 20, true },
expect = { 8, 7 }
},
{ name = 'find: plain + excessive init', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', -20, true },
expect = { 1, 0 }
},
{ name = 'find: capture (1)', func = mw.ustring.find,
args = { "bar ¡foo bar", '(¡foo)' },
expect = { 5, 8, '¡foo' }
},
{ name = 'find: capture (2)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '(¡f%a+)' },
expect = { 5, 8, '¡fóo' }
},
{ name = 'find: capture (3)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '(¡f(%a)%a)' },
expect = { 5, 8, '¡fóo', 'ó' }
},
{ name = 'find: capture (4)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '(¡f(%a-)%a)' },
expect = { 5, 7, '¡fó', '' }
},
{ name = 'find: capture (5)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '()(()¡f()(%a)()%a())()' },
expect = { 5, 8, 5, '¡fóo', 5, 7, 'ó', 8, 9, 9 }
},
{ name = 'find: capture (6)', func = mw.ustring.find,
args = { "fóó", "()(f)()(óó)()" },
expect = { 1, 3, 1, 'f', 2, 'óó', 4 }
},
{ name = 'find: capture (7)', func = mw.ustring.find,
args = { "fóó fóó", "()(f)()(óó)()", 2 },
expect = { 5, 7, 5, 'f', 6, 'óó', 8 }
},
{ name = 'find: (%1)', func = mw.ustring.find,
args = { "foo foofóó foófoó bar", '(f%a+)%1' },
expect = { 12, 17, 'foó' }
},
{ name = 'find: deceptively-simple pattern', func = mw.ustring.find,
args = { "fóó", '([^a-z])' },
expect = { 2, 2, 'ó' }
},
{ name = 'find: Bracket at start of a character set doesn\'t close', func = mw.ustring.find,
args = { "fóó", '()[]' },
expect = "Missing close-bracket for character set beginning at pattern character 3"
},
{ name = 'find: Bracket at start of a negated character set doesn\'t close', func = mw.ustring.find,
args = { "fóó", '()[^]' },
expect = "Missing close-bracket for character set beginning at pattern character 3"
},
{ name = 'find: Bracket at start of a character set is literal', func = mw.ustring.find,
args = { "foo]bar¿", '()([]])' },
expect = { 4, 4, 4, ']' }
},
{ name = 'find: Bracket at start of a negated character set is literal', func = mw.ustring.find,
args = { "]bar¿", '()([^]])' },
expect = { 2, 2, 2, 'b' }
},
{ name = 'find: Bracket at start of a character set can be a range endpoint', func = mw.ustring.find,
args = { "foo]bar¿", '()([]-z]+)' },
expect = { 1, 7, 1, 'foo]bar' }
},
{ name = 'find: Bracket at start of a negated character can be a range endpoint', func = mw.ustring.find,
args = { "fOO]bar¿", '()([^]-z]+)' },
expect = { 2, 3, 2, 'OO' }
},
{ name = 'find: Weird edge-case that was failing (1)', func = mw.ustring.find,
args = { "foo]ba-]r¿", '()([a]-%]+)' },
expect = { 4, 4, 4, ']' }
},
{ name = 'find: Weird edge-case that was failing (2)', func = mw.ustring.find,
args = { "foo¿", '()[!-%]' },
expect = "Missing close-bracket for character set beginning at pattern character 3"
},
{ name = 'find: Inverted range (1)', func = mw.ustring.find,
args = { "foo¿", '()([z-a]+)' },
expect = { nil }
},
{ name = 'find: Inverted range (2)', func = mw.ustring.find,
args = { "foo¿", '()([^z-a]+)' },
expect = { 1, 4, 1, 'foo¿' }
},
{ name = 'find: Inverted range (3)', func = mw.ustring.find,
args = { "foo¿", '()(f[z-a]o)' },
expect = { nil }
},
{ name = 'find: Inverted range (4)', func = mw.ustring.find,
args = { "foo¿", '()(f[z-a]*o)' },
expect = { 1, 2, 1, 'fo' }
},
{ name = 'match: (1)', func = mw.ustring.match,
args = { "bar fóo bar", 'f%a+' },
expect = { 'fóo' }
},
{ name = 'match: (2)', func = mw.ustring.match,
args = { "bar fóo bar", 'f(%a+)' },
expect = { 'óo' }
},
{ name = 'match: empty pattern', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()' },
expect = { 1 }
},
{ name = 'match: empty pattern (2)', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()', 2 },
expect = { 2 }
},
{ name = 'match: excessive init', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()', 20 },
expect = { 8 }
},
{ name = 'match: excessive init (2)', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()', -20 },
expect = { 1 }
},
{ name = 'gsub: (emtpy string, empty pattern)', func = mw.ustring.gsub,
args = { '', '', 'X' },
expect = { 'X', 1 }
},
{ name = 'gsub: (emtpy string, one char pattern)', func = mw.ustring.gsub,
args = { '', 'á', 'X' },
expect = { '', 0 }
},
{ name = 'gsub: (one char string, one char pattern)', func = mw.ustring.gsub,
args = { 'á', 'á', 'X' },
expect = { 'X', 1 }
},
{ name = 'gsub: (one char string, empty pattern)', func = mw.ustring.gsub,
args = { 'á', '', 'X' },
expect = { 'XáX', 2 }
},
{ name = 'gsub: (empty pattern with position captures)', func = mw.ustring.gsub,
args = { 'ábć', '()', '%1' },
expect = { '1á2b3ć4', 4 }
},
{ name = 'gsub: (limited to 1 replacement)', func = mw.ustring.gsub,
args = { 'áá', 'á', 'X', 1 },
expect = { '', 1 }
},
{ name = 'gsub: (limited to 0 replacements)', func = mw.ustring.gsub,
args = { 'áá', 'á', 'X', 0 },
expect = { 'áá', 0 }
},
{ name = 'gsub: (string 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', 'X' },
expect = { 'X bar X X baz X X X', 6 }
},
{ name = 'gsub: (string 2)', func = mw.ustring.gsub,
args = { str3, 'f%a+', 'X' },
expect = { '??? X bar X X baz X X X ok?', 6 }
},
{ name = 'gsub: (string 3)', func = mw.ustring.gsub,
args = { str2, 'f%a+', 'X', 3 },
expect = { 'X bar X X baz foooo foofoo fo', 3 }
},
{ name = 'gsub: (string 4)', func = mw.ustring.gsub,
args = { str3, 'f%a+', 'X', 3 },
expect = { '??? X bar X X baz foooo foofoo fo ok?', 3 }
},
{ name = 'gsub: (string 5)', func = mw.ustring.gsub,
args = { 'foo; fóó', '(f)(%a+)', '%%0=%0 %%1=%1 %%2=%2' },
expect = { '%0=foo %1=f %2=oo; %0=fóó %1=f %2=óó', 2 }
},
{ name = 'gsub: (anchored)', func = mw.ustring.gsub,
args = { 'foofoofoo foo', '^foo', 'X' },
expect = { 'Xfoofoo foo', 1 }
},
{ name = 'gsub: (table 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
expect = { 'X bar Y Z baz foooo foofoo fo', 6 }
},
{ name = 'gsub: (table 2)', func = mw.ustring.gsub,
args = { str3, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
expect = { '??? X bar Y Z baz foooo foofoo fo ok?', 6 }
},
{ name = 'gsub: (table 3)', func = mw.ustring.gsub,
args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 },
expect = { str2, 1 }
},
{ name = 'gsub: (inverted zero character class)', func = mw.ustring.gsub,
args = { "ó", '%Z', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (single dot pattern at end)', func = mw.ustring.gsub,
args = { "ó", '.', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (single dot pattern at end + leading)', func = mw.ustring.gsub,
args = { '', 'f.', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern)', func = mw.ustring.gsub,
args = { 'f ó b', 'f . b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with +)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .+ b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with -)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .- b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with *)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .* b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (function 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 }
},
{ name = 'gsub: (function 2)', func = mw.ustring.gsub,
args = { str3, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
expect = { '??? -FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo ok?', 6 }
},
{ name = 'gsub: invalid replacement string', func = mw.ustring.gsub,
args = { 'foo; fóó', '(%a+)', '%2' },
expect = "invalid capture index %2 in replacement string"
},
{ name = 'gsub: passing numbers instead of strings (1)', func = mw.ustring.gsub,
args = { 12345, '[3]', '9' },
expect = { '12945', 1 }
},
{ name = 'gsub: passing numbers instead of strings (2)', func = mw.ustring.gsub,
args = { '12345', 3, '9' },
expect = { '12945', 1 }
},
{ name = 'gsub: passing numbers instead of strings (3)', func = mw.ustring.gsub,
args = { '12345', '[3]', 9 },
expect = { '12945', 1 }
},
{ name = 'gcodepoint: basic test', func = mw.ustring.gcodepoint,
args = { str1 },
expect = { { 0 }, { 0x7f }, { 0x80 }, { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4)', func = mw.ustring.gcodepoint,
args = { str1, 4 },
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4, -2)', func = mw.ustring.gcodepoint,
args = { str1, 4, -2 },
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
args = { str1, 4, 3 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
args = { str1, 1, 0 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
args = { str1, 9, 9 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: really long string', func = testLongGcodepoint,
args = {},
expect = { {
[1] = 0x66, [2] = 0x151, [3] = 0xf3, [4] = 0x20,
[9997] = 0x66, [9998] = 0x151, [9999] = 0xf3, [10000] = 0x20,
} },
},
{ name = 'gmatch: test string 1', func = mw.ustring.gmatch,
args = { str2, 'f%a+' },
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
type = 'Iterator'
},
{ name = 'gmatch: test string 2', func = mw.ustring.gmatch,
args = { str3, 'f%a+' },
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
type = 'Iterator'
},
{ name = 'gmatch: anchored', func = mw.ustring.gmatch,
args = { "fóó1 ^fóó2 fóó3 ^fóó4", '^fóó%d+' },
expect = { { "^fóó2" }, { "^fóó4" } },
type = 'Iterator'
},
{ name = 'find: Pure-lua version, non-native error message', func = mw.ustring.find,
args = { "fóó", '[]' },
expect = "Missing close-bracket for character set beginning at pattern character 1"
},
{ name = 'match: Pure-lua version, non-native error message', func = mw.ustring.match,
args = { "fóó", '[]' },
expect = "Missing close-bracket for character set beginning at pattern character 1"
},
{ name = 'gsub: Pure-lua version, non-native error message', func = mw.ustring.gsub,
args = { "fóó", '[]', '' },
expect = "Missing close-bracket for character set beginning at pattern character 1"
},
{ name = 'string length limit',
func = function ()
local s = string.rep( "x", mw.ustring.maxStringLength + 1 )
local ret = { mw.ustring.gsub( s, 'a', 'b' ) }
-- So the output isn't insanely long
ret[1] = string.gsub( ret[1], 'xxxxx(x*)', function ( m )
return 'xxxxx[snip ' .. #m .. ' more]'
end )
return unpack( ret )
end,
expect = "bad argument #1 to 'gsub' (string is longer than " .. mw.ustring.maxStringLength .. " bytes)"
},
{ name = 'pattern length limit',
func = function ()
local pattern = string.rep( "x", mw.ustring.maxPatternLength + 1 )
return mw.ustring.gsub( 'a', pattern, 'b' )
end,
expect = "bad argument #2 to 'gsub' (pattern is longer than " .. mw.ustring.maxPatternLength .. " bytes)"
},
} )