mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Scribunto
synced 2024-12-03 20:36:18 +00:00
731 lines
23 KiB
Lua
731 lines
23 KiB
Lua
local testframework = require 'Module:TestFramework'
|
||
|
||
local str1 = "\0\127\194\128\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191"
|
||
local str2 = "foo bar főó foó baz foooo foofoo fo"
|
||
local str3 = "??? foo bar főó foó baz foooo foofoo fo ok?"
|
||
local str4 = {}
|
||
for i = 1, 10000/4 do
|
||
str4[i] = "főó "
|
||
end
|
||
str4 = table.concat( str4 )
|
||
|
||
local function testLongGcodepoint()
|
||
local ret = {}
|
||
local i = 1
|
||
for cp in mw.ustring.gcodepoint( str4 ) do
|
||
if i <= 4 or i > 9996 then
|
||
ret[i] = cp
|
||
end
|
||
i = i + 1
|
||
end
|
||
return ret
|
||
end
|
||
|
||
return testframework.getTestProvider( {
|
||
{ name = 'isutf8: valid string', func = mw.ustring.isutf8,
|
||
args = { "\0 \127 \194\128 \223\191 \224\160\128 \239\191\191 \240\144\128\128 \244\143\191\191" },
|
||
expect = { true }
|
||
},
|
||
{ name = 'isutf8: out of range character', func = mw.ustring.isutf8,
|
||
args = { "\244\144\128\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: insufficient continuation bytes', func = mw.ustring.isutf8,
|
||
args = { "\240\128\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: excess continuation bytes', func = mw.ustring.isutf8,
|
||
args = { "\194\128\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: bare continuation byte', func = mw.ustring.isutf8,
|
||
args = { "\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: overlong encoding', func = mw.ustring.isutf8,
|
||
args = { "\192\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: overlong encoding (2)', func = mw.ustring.isutf8,
|
||
args = { "\193\191" },
|
||
expect = { false }
|
||
},
|
||
|
||
{ name = 'byteoffset: (1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 1 },
|
||
expect = { 1 }
|
||
},
|
||
{ name = 'byteoffset: (2)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 2 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'byteoffset: (3)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 3 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (4)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 4 },
|
||
expect = { nil }
|
||
},
|
||
{ name = 'byteoffset: (0,1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 1 },
|
||
expect = { 1 }
|
||
},
|
||
{ name = 'byteoffset: (0,2)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 2 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'byteoffset: (0,3)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 3 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'byteoffset: (0,4)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 4 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (0,5)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 5 },
|
||
expect = { nil }
|
||
},
|
||
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, -1 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
|
||
args = { "foó", 0, -1 },
|
||
expect = { 3 }
|
||
},
|
||
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 1, -1 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
|
||
args = { "foó", 1, -1 },
|
||
expect = { nil }
|
||
},
|
||
|
||
{ name = 'codepoint: whole string', func = mw.ustring.codepoint,
|
||
args = { str1, 1, -1 },
|
||
expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
|
||
},
|
||
{ name = 'codepoint: substring', func = mw.ustring.codepoint,
|
||
args = { str1, 5, -2 },
|
||
expect = { 0x800, 0xffff, 0x10000 }
|
||
},
|
||
{ name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
|
||
args = { str1, 5, 4 },
|
||
expect = {}
|
||
},
|
||
{ name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
|
||
args = { str1, 1, 0 },
|
||
expect = {}
|
||
},
|
||
{ name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
|
||
args = { str1, 9, 9 },
|
||
expect = {}
|
||
},
|
||
{ name = 'codepoint: end of a really long string', func = mw.ustring.codepoint,
|
||
args = { str4, 9000, 9004 },
|
||
expect = { 0x20, 0x66, 0x151, 0xf3, 0x20 }
|
||
},
|
||
|
||
{ name = 'char: basic test', func = mw.ustring.char,
|
||
args = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff },
|
||
expect = { str1 }
|
||
},
|
||
{ name = 'char: invalid codepoint', func = mw.ustring.char,
|
||
args = { 0x110000 },
|
||
expect = "bad argument #1 to 'char' (value out of range)"
|
||
},
|
||
{ name = 'char: invalid value', func = mw.ustring.char,
|
||
args = { 'foo' },
|
||
expect = "bad argument #1 to 'char' (number expected, got string)"
|
||
},
|
||
|
||
{ name = 'len: basic test', func = mw.ustring.len,
|
||
args = { str1 },
|
||
expect = { 8 }
|
||
},
|
||
{ name = 'len: invalid string', func = mw.ustring.len,
|
||
args = { "\244\144\128\128" },
|
||
expect = { nil }
|
||
},
|
||
|
||
{ name = 'sub: (4)', func = mw.ustring.sub,
|
||
args = { str1, 4 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
|
||
},
|
||
{ name = 'sub: (4,7)', func = mw.ustring.sub,
|
||
args = { str1, 4, 7 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
|
||
},
|
||
{ name = 'sub: (4,-1)', func = mw.ustring.sub,
|
||
args = { str1, 4, -1 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
|
||
},
|
||
{ name = 'sub: (4,-2)', func = mw.ustring.sub,
|
||
args = { str1, 4, -2 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
|
||
},
|
||
{ name = 'sub: (-2)', func = mw.ustring.sub,
|
||
args = { str1, -2 },
|
||
expect = { "\240\144\128\128\244\143\191\191" }
|
||
},
|
||
{ name = 'sub: (9)', func = mw.ustring.sub,
|
||
args = { str1, 9 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: (0)', func = mw.ustring.sub,
|
||
args = { str1, 0 },
|
||
expect = { str1 }
|
||
},
|
||
{ name = 'sub: (4,3)', func = mw.ustring.sub,
|
||
args = { str1, 4, 3 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: (1,0)', func = mw.ustring.sub,
|
||
args = { str2, 1, 0 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: (5,5)', func = mw.ustring.sub,
|
||
args = { str1, 5, 5 },
|
||
expect = { "\224\160\128" }
|
||
},
|
||
{ name = 'sub: (9,9)', func = mw.ustring.sub,
|
||
args = { str1, 9, 9 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: empty string', func = mw.ustring.sub,
|
||
args = { '', 5 },
|
||
expect = { "" }
|
||
},
|
||
|
||
{ name = 'upper: basic test', func = mw.ustring.upper,
|
||
args = { "fóó?" },
|
||
expect = { "FÓÓ?" }
|
||
},
|
||
{ name = 'lower: basic test', func = mw.ustring.lower,
|
||
args = { "FÓÓ?" },
|
||
expect = { "fóó?" }
|
||
},
|
||
|
||
{ name = 'find: (simple)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡foo' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (%)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo%+' },
|
||
expect = { }
|
||
},
|
||
{ name = 'find: (%)', func = mw.ustring.find,
|
||
args = { "bar ¡fo+ bar", '¡fo%+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (+)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (+) (2)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fx+o+' },
|
||
expect = {}
|
||
},
|
||
{ name = 'find: (?)', func = mw.ustring.find,
|
||
args = { "bar ¡foox bar", '¡foox?' },
|
||
expect = { 5, 9 }
|
||
},
|
||
{ name = 'find: (?) (2)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡foox?' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (*)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fx*oo' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (-)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo-' },
|
||
expect = { 5, 6 }
|
||
},
|
||
{ name = 'find: (-)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo-o' },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: (-)', func = mw.ustring.find,
|
||
args = { "bar ¡foox bar", '¡fo-x' },
|
||
expect = { 5, 9 }
|
||
},
|
||
{ name = 'find: (%a)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡f%a' },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: (%a, utf8)', func = mw.ustring.find,
|
||
args = { "bar ¡fóó bar", '¡f%a' },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: (%a, utf8 2)', func = mw.ustring.find,
|
||
args = { "bar ¡fóó bar", 'f%a' },
|
||
expect = { 6, 7 }
|
||
},
|
||
{ name = 'find: (%a+)', func = mw.ustring.find,
|
||
args = { "bar ¡fóó bar", '¡f%a+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: ([]+)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '¡f[oó]+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: ([-]+)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '¡f[a-uá-ú]+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: ([-]+ 2)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '¡f[a-ú]+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (%b)', func = mw.ustring.find,
|
||
args = { "bar ¡<foo <foo> foo> bar", '¡%b<>' },
|
||
expect = { 5, 20 }
|
||
},
|
||
{ name = 'find: (%b 2)', func = mw.ustring.find,
|
||
args = { "bar ¡(foo (foo) foo) bar", '¡%b()' },
|
||
expect = { 5, 20 }
|
||
},
|
||
{ name = 'find: (%b 3)', func = mw.ustring.find,
|
||
args = { "bar ¡-foo-foo- bar", '¡%b--' },
|
||
expect = { 5, 10 }
|
||
},
|
||
{ name = 'find: (%b 4)', func = mw.ustring.find,
|
||
args = { "bar «foo «foo» foo» bar", '%b«»' },
|
||
expect = { 5, 19 }
|
||
},
|
||
{ name = 'find: (%b 5)', func = mw.ustring.find,
|
||
args = { "bar !foo !foo¡ foo¡ bar", '%b!¡' },
|
||
expect = { 5, 19 }
|
||
},
|
||
{ name = 'find: (%b 6)', func = mw.ustring.find,
|
||
args = { "bar ¡foo ¡foo! foo! bar", '%b¡!' },
|
||
expect = { 5, 19 }
|
||
},
|
||
{ name = 'find: (%b 7)', func = mw.ustring.find,
|
||
args = { "bar ¡foo¡foo¡ bar", '%b¡¡' },
|
||
expect = { 5, 9 }
|
||
},
|
||
{ name = 'find: (%f)', func = mw.ustring.find,
|
||
args = { "foo ¡foobar ¡foo bar baz", '¡.-%f[%s]' },
|
||
expect = { 5, 11 }
|
||
},
|
||
{ name = 'find: (%f 2)', func = mw.ustring.find,
|
||
args = { "foo ¡foobar ¡foo bar baz", '¡foo%f[%s]' },
|
||
expect = { 13, 16 }
|
||
},
|
||
{ name = 'find: (%f 3)', func = mw.ustring.find,
|
||
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]' },
|
||
expect = { 16, 19 }
|
||
},
|
||
{ name = 'find: (%f 4)', func = mw.ustring.find,
|
||
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]', 16 },
|
||
expect = { 16, 19 }
|
||
},
|
||
{ name = 'find: (%f 5)', func = mw.ustring.find,
|
||
args = { "foo ¡bar baz", '%f[%Z]' },
|
||
expect = { 1, 0 }
|
||
},
|
||
{ name = 'find: (%f 6)', func = mw.ustring.find,
|
||
args = { "foo ¡bar baz", '%f[%z]' },
|
||
expect = { 13, 12 }
|
||
},
|
||
{ name = 'find: (%f 7)', func = mw.ustring.find,
|
||
args = { "foo ¡b\0r baz", '%f[%Z]', 2 },
|
||
expect = { 8, 7 }
|
||
},
|
||
{ name = 'find: (%f 8)', func = mw.ustring.find,
|
||
args = { "\0foo ¡b\0r baz", '%f[%z]' },
|
||
expect = { 8, 7 }
|
||
},
|
||
{ name = 'find: (%f 9)', func = mw.ustring.find,
|
||
args = { "\0foo ¡b\0r baz", '%f[%Z]' },
|
||
expect = { 2, 1 }
|
||
},
|
||
{ name = 'find: (%A)', func = mw.ustring.find,
|
||
args = { "fóó? bar", '%A+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: (%W)', func = mw.ustring.find,
|
||
args = { "fóó? bar", '%W+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: ([^])', func = mw.ustring.find,
|
||
args = { "fóó? bar", '[^a-zó]+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: ([^] 2)', func = mw.ustring.find,
|
||
args = { "fó0? bar", '[^%a0-9]+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: ([^] 3)', func = mw.ustring.find,
|
||
args = { "¡fó0% bar", '¡[^%%]+' },
|
||
expect = { 1, 4 }
|
||
},
|
||
{ name = 'find: ($)', func = mw.ustring.find,
|
||
args = { "¡foo1 ¡foo2 ¡foo3", '¡foo[0-9]+$' },
|
||
expect = { 13, 17 }
|
||
},
|
||
{ name = 'find: (.*)', func = mw.ustring.find,
|
||
args = { "¡foo¡ ¡bar¡ baz", '¡.*¡' },
|
||
expect = { 1, 11 }
|
||
},
|
||
{ name = 'find: (.-)', func = mw.ustring.find,
|
||
args = { "¡foo¡ ¡bar¡ baz", '¡.-¡' },
|
||
expect = { 1, 5 }
|
||
},
|
||
{ name = 'find: plain', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '¡.¡', 1, true },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: empty delimiter', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '' },
|
||
expect = { 1, 0 }
|
||
},
|
||
{ name = 'find: empty delimiter (2)', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 2 },
|
||
expect = { 2, 1 }
|
||
},
|
||
{ name = 'find: plain + empty delimiter', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 1, true },
|
||
expect = { 1, 0 }
|
||
},
|
||
{ name = 'find: plain + empty delimiter (2)', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 2, true },
|
||
expect = { 2, 1 }
|
||
},
|
||
{ name = 'find: excessive init', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '()', 20 },
|
||
expect = { 8, 7, 8 }
|
||
},
|
||
{ name = 'find: excessive init (2)', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '()', -20 },
|
||
expect = { 1, 0, 1 }
|
||
},
|
||
{ name = 'find: plain + excessive init', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 20, true },
|
||
expect = { 8, 7 }
|
||
},
|
||
{ name = 'find: plain + excessive init', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', -20, true },
|
||
expect = { 1, 0 }
|
||
},
|
||
|
||
{ name = 'find: capture (1)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '(¡foo)' },
|
||
expect = { 5, 8, '¡foo' }
|
||
},
|
||
{ name = 'find: capture (2)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '(¡f%a+)' },
|
||
expect = { 5, 8, '¡fóo' }
|
||
},
|
||
{ name = 'find: capture (3)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '(¡f(%a)%a)' },
|
||
expect = { 5, 8, '¡fóo', 'ó' }
|
||
},
|
||
{ name = 'find: capture (4)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '(¡f(%a-)%a)' },
|
||
expect = { 5, 7, '¡fó', '' }
|
||
},
|
||
{ name = 'find: capture (5)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '()(()¡f()(%a)()%a())()' },
|
||
expect = { 5, 8, 5, '¡fóo', 5, 7, 'ó', 8, 9, 9 }
|
||
},
|
||
{ name = 'find: capture (6)', func = mw.ustring.find,
|
||
args = { "fóó", "()(f)()(óó)()" },
|
||
expect = { 1, 3, 1, 'f', 2, 'óó', 4 }
|
||
},
|
||
{ name = 'find: capture (7)', func = mw.ustring.find,
|
||
args = { "fóó fóó", "()(f)()(óó)()", 2 },
|
||
expect = { 5, 7, 5, 'f', 6, 'óó', 8 }
|
||
},
|
||
{ name = 'find: (%1)', func = mw.ustring.find,
|
||
args = { "foo foofóó foófoó bar", '(f%a+)%1' },
|
||
expect = { 12, 17, 'foó' }
|
||
},
|
||
{ name = 'find: deceptively-simple pattern', func = mw.ustring.find,
|
||
args = { "fóó", '([^a-z])' },
|
||
expect = { 2, 2, 'ó' }
|
||
},
|
||
{ name = 'find: Bracket at start of a character set doesn\'t close', func = mw.ustring.find,
|
||
args = { "fóó", '()[]' },
|
||
expect = "Missing close-bracket for character set beginning at pattern character 3"
|
||
},
|
||
{ name = 'find: Bracket at start of a negated character set doesn\'t close', func = mw.ustring.find,
|
||
args = { "fóó", '()[^]' },
|
||
expect = "Missing close-bracket for character set beginning at pattern character 3"
|
||
},
|
||
{ name = 'find: Bracket at start of a character set is literal', func = mw.ustring.find,
|
||
args = { "foo]bar¿", '()([]])' },
|
||
expect = { 4, 4, 4, ']' }
|
||
},
|
||
{ name = 'find: Bracket at start of a negated character set is literal', func = mw.ustring.find,
|
||
args = { "]bar¿", '()([^]])' },
|
||
expect = { 2, 2, 2, 'b' }
|
||
},
|
||
{ name = 'find: Bracket at start of a character set can be a range endpoint', func = mw.ustring.find,
|
||
args = { "foo]bar¿", '()([]-z]+)' },
|
||
expect = { 1, 7, 1, 'foo]bar' }
|
||
},
|
||
{ name = 'find: Bracket at start of a negated character can be a range endpoint', func = mw.ustring.find,
|
||
args = { "fOO]bar¿", '()([^]-z]+)' },
|
||
expect = { 2, 3, 2, 'OO' }
|
||
},
|
||
{ name = 'find: Weird edge-case that was failing (1)', func = mw.ustring.find,
|
||
args = { "foo]ba-]r¿", '()([a]-%]+)' },
|
||
expect = { 4, 4, 4, ']' }
|
||
},
|
||
{ name = 'find: Weird edge-case that was failing (2)', func = mw.ustring.find,
|
||
args = { "foo¿", '()[!-%]' },
|
||
expect = "Missing close-bracket for character set beginning at pattern character 3"
|
||
},
|
||
{ name = 'find: Inverted range (1)', func = mw.ustring.find,
|
||
args = { "foo¿", '()([z-a]+)' },
|
||
expect = { nil }
|
||
},
|
||
{ name = 'find: Inverted range (2)', func = mw.ustring.find,
|
||
args = { "foo¿", '()([^z-a]+)' },
|
||
expect = { 1, 4, 1, 'foo¿' }
|
||
},
|
||
{ name = 'find: Inverted range (3)', func = mw.ustring.find,
|
||
args = { "foo¿", '()(f[z-a]o)' },
|
||
expect = { nil }
|
||
},
|
||
{ name = 'find: Inverted range (4)', func = mw.ustring.find,
|
||
args = { "foo¿", '()(f[z-a]*o)' },
|
||
expect = { 1, 2, 1, 'fo' }
|
||
},
|
||
|
||
{ name = 'match: (1)', func = mw.ustring.match,
|
||
args = { "bar fóo bar", 'f%a+' },
|
||
expect = { 'fóo' }
|
||
},
|
||
{ name = 'match: (2)', func = mw.ustring.match,
|
||
args = { "bar fóo bar", 'f(%a+)' },
|
||
expect = { 'óo' }
|
||
},
|
||
{ name = 'match: empty pattern', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()' },
|
||
expect = { 1 }
|
||
},
|
||
{ name = 'match: empty pattern (2)', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()', 2 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'match: excessive init', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()', 20 },
|
||
expect = { 8 }
|
||
},
|
||
{ name = 'match: excessive init (2)', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()', -20 },
|
||
expect = { 1 }
|
||
},
|
||
|
||
{ name = 'gsub: (emtpy string, empty pattern)', func = mw.ustring.gsub,
|
||
args = { '', '', 'X' },
|
||
expect = { 'X', 1 }
|
||
},
|
||
{ name = 'gsub: (emtpy string, one char pattern)', func = mw.ustring.gsub,
|
||
args = { '', 'á', 'X' },
|
||
expect = { '', 0 }
|
||
},
|
||
{ name = 'gsub: (one char string, one char pattern)', func = mw.ustring.gsub,
|
||
args = { 'á', 'á', 'X' },
|
||
expect = { 'X', 1 }
|
||
},
|
||
{ name = 'gsub: (one char string, empty pattern)', func = mw.ustring.gsub,
|
||
args = { 'á', '', 'X' },
|
||
expect = { 'XáX', 2 }
|
||
},
|
||
{ name = 'gsub: (empty pattern with position captures)', func = mw.ustring.gsub,
|
||
args = { 'ábć', '()', '%1' },
|
||
expect = { '1á2b3ć4', 4 }
|
||
},
|
||
{ name = 'gsub: (limited to 1 replacement)', func = mw.ustring.gsub,
|
||
args = { 'áá', 'á', 'X', 1 },
|
||
expect = { 'Xá', 1 }
|
||
},
|
||
{ name = 'gsub: (limited to 0 replacements)', func = mw.ustring.gsub,
|
||
args = { 'áá', 'á', 'X', 0 },
|
||
expect = { 'áá', 0 }
|
||
},
|
||
{ name = 'gsub: (string 1)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', 'X' },
|
||
expect = { 'X bar X X baz X X X', 6 }
|
||
},
|
||
{ name = 'gsub: (string 2)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', 'X' },
|
||
expect = { '??? X bar X X baz X X X ok?', 6 }
|
||
},
|
||
{ name = 'gsub: (string 3)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', 'X', 3 },
|
||
expect = { 'X bar X X baz foooo foofoo fo', 3 }
|
||
},
|
||
{ name = 'gsub: (string 4)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', 'X', 3 },
|
||
expect = { '??? X bar X X baz foooo foofoo fo ok?', 3 }
|
||
},
|
||
{ name = 'gsub: (string 5)', func = mw.ustring.gsub,
|
||
args = { 'foo; fóó', '(f)(%a+)', '%%0=%0 %%1=%1 %%2=%2' },
|
||
expect = { '%0=foo %1=f %2=oo; %0=fóó %1=f %2=óó', 2 }
|
||
},
|
||
{ name = 'gsub: (anchored)', func = mw.ustring.gsub,
|
||
args = { 'foofoofoo foo', '^foo', 'X' },
|
||
expect = { 'Xfoofoo foo', 1 }
|
||
},
|
||
{ name = 'gsub: (table 1)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
|
||
expect = { 'X bar Y Z baz foooo foofoo fo', 6 }
|
||
},
|
||
{ name = 'gsub: (table 2)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
|
||
expect = { '??? X bar Y Z baz foooo foofoo fo ok?', 6 }
|
||
},
|
||
{ name = 'gsub: (table 3)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 },
|
||
expect = { str2, 1 }
|
||
},
|
||
{ name = 'gsub: (inverted zero character class)', func = mw.ustring.gsub,
|
||
args = { "ó", '%Z', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (single dot pattern at end)', func = mw.ustring.gsub,
|
||
args = { "ó", '.', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (single dot pattern at end + leading)', func = mw.ustring.gsub,
|
||
args = { 'fó', 'f.', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (dot pattern)', func = mw.ustring.gsub,
|
||
args = { 'f ó b', 'f . b', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (dot pattern with +)', func = mw.ustring.gsub,
|
||
args = { 'f óóó b', 'f .+ b', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (dot pattern with -)', func = mw.ustring.gsub,
|
||
args = { 'f óóó b', 'f .- b', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (dot pattern with *)', func = mw.ustring.gsub,
|
||
args = { 'f óóó b', 'f .* b', 'repl' },
|
||
expect = { 'repl', 1 }
|
||
},
|
||
{ name = 'gsub: (function 1)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
|
||
expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 }
|
||
},
|
||
{ name = 'gsub: (function 2)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
|
||
expect = { '??? -FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo ok?', 6 }
|
||
},
|
||
{ name = 'gsub: invalid replacement string', func = mw.ustring.gsub,
|
||
args = { 'foo; fóó', '(%a+)', '%2' },
|
||
expect = "invalid capture index %2 in replacement string"
|
||
},
|
||
{ name = 'gsub: passing numbers instead of strings (1)', func = mw.ustring.gsub,
|
||
args = { 12345, '[33]', '9' },
|
||
expect = { '12945', 1 }
|
||
},
|
||
{ name = 'gsub: passing numbers instead of strings (2)', func = mw.ustring.gsub,
|
||
args = { '12345', 3, '9' },
|
||
expect = { '12945', 1 }
|
||
},
|
||
{ name = 'gsub: passing numbers instead of strings (3)', func = mw.ustring.gsub,
|
||
args = { '12345', '[33]', 9 },
|
||
expect = { '12945', 1 }
|
||
},
|
||
|
||
{ name = 'gcodepoint: basic test', func = mw.ustring.gcodepoint,
|
||
args = { str1 },
|
||
expect = { { 0 }, { 0x7f }, { 0x80 }, { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (4)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 4 },
|
||
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (4, -2)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 4, -2 },
|
||
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 4, 3 },
|
||
expect = {},
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 1, 0 },
|
||
expect = {},
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 9, 9 },
|
||
expect = {},
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: really long string', func = testLongGcodepoint,
|
||
args = {},
|
||
expect = { {
|
||
[1] = 0x66, [2] = 0x151, [3] = 0xf3, [4] = 0x20,
|
||
[9997] = 0x66, [9998] = 0x151, [9999] = 0xf3, [10000] = 0x20,
|
||
} },
|
||
},
|
||
|
||
{ name = 'gmatch: test string 1', func = mw.ustring.gmatch,
|
||
args = { str2, 'f%a+' },
|
||
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gmatch: test string 2', func = mw.ustring.gmatch,
|
||
args = { str3, 'f%a+' },
|
||
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gmatch: anchored', func = mw.ustring.gmatch,
|
||
args = { "fóó1 ^fóó2 fóó3 ^fóó4", '^fóó%d+' },
|
||
expect = { { "^fóó2" }, { "^fóó4" } },
|
||
type = 'Iterator'
|
||
},
|
||
|
||
{ name = 'find: Pure-lua version, non-native error message', func = mw.ustring.find,
|
||
args = { "fóó", '[]' },
|
||
expect = "Missing close-bracket for character set beginning at pattern character 1"
|
||
},
|
||
{ name = 'match: Pure-lua version, non-native error message', func = mw.ustring.match,
|
||
args = { "fóó", '[]' },
|
||
expect = "Missing close-bracket for character set beginning at pattern character 1"
|
||
},
|
||
{ name = 'gsub: Pure-lua version, non-native error message', func = mw.ustring.gsub,
|
||
args = { "fóó", '[]', '' },
|
||
expect = "Missing close-bracket for character set beginning at pattern character 1"
|
||
},
|
||
|
||
{ name = 'string length limit',
|
||
func = function ()
|
||
local s = string.rep( "x", mw.ustring.maxStringLength + 1 )
|
||
local ret = { mw.ustring.gsub( s, 'a', 'b' ) }
|
||
-- So the output isn't insanely long
|
||
ret[1] = string.gsub( ret[1], 'xxxxx(x*)', function ( m )
|
||
return 'xxxxx[snip ' .. #m .. ' more]'
|
||
end )
|
||
return unpack( ret )
|
||
end,
|
||
expect = "bad argument #1 to 'gsub' (string is longer than " .. mw.ustring.maxStringLength .. " bytes)"
|
||
},
|
||
{ name = 'pattern length limit',
|
||
func = function ()
|
||
local pattern = string.rep( "x", mw.ustring.maxPatternLength + 1 )
|
||
return mw.ustring.gsub( 'a', pattern, 'b' )
|
||
end,
|
||
expect = "bad argument #2 to 'gsub' (pattern is longer than " .. mw.ustring.maxPatternLength .. " bytes)"
|
||
},
|
||
} )
|