mediawiki-extensions-Scribunto/tests/phpunit/Engines/LuaCommon/UstringLibraryTests.lua
Reedy 1eecdac6de Capitalise Engines folder
Change-Id: I6d730d67decc859fd130fee5ec92b1cfb8d9ef64
2022-09-30 00:58:27 +00:00

771 lines
25 KiB
Lua
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

local testframework = require 'Module:TestFramework'
local str1 = "\0\127\194\128\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191"
local str2 = "foo bar főó foó baz foooo foofoo fo"
local str3 = "??? foo bar főó foó baz foooo foofoo fo ok?"
local str4 = {}
for i = 1, 10000/4 do
str4[i] = "főó "
end
str4 = table.concat( str4 )
local function testLongGcodepoint()
local ret = {}
local i = 1
for cp in mw.ustring.gcodepoint( str4 ) do
if i <= 4 or i > 9996 then
ret[i] = cp
end
i = i + 1
end
return ret
end
return testframework.getTestProvider( {
{ name = 'isutf8: valid string', func = mw.ustring.isutf8,
args = { "\0 \127 \194\128 \223\191 \224\160\128 \239\191\191 \240\144\128\128 \244\143\191\191" },
expect = { true }
},
{ name = 'isutf8: out of range character', func = mw.ustring.isutf8,
args = { "\244\144\128\128" },
expect = { false }
},
{ name = 'isutf8: insufficient continuation bytes', func = mw.ustring.isutf8,
args = { "\240\128\128" },
expect = { false }
},
{ name = 'isutf8: excess continuation bytes', func = mw.ustring.isutf8,
args = { "\194\128\128" },
expect = { false }
},
{ name = 'isutf8: bare continuation byte', func = mw.ustring.isutf8,
args = { "\128" },
expect = { false }
},
{ name = 'isutf8: overlong encoding', func = mw.ustring.isutf8,
args = { "\192\128" },
expect = { false }
},
{ name = 'isutf8: overlong encoding (2)', func = mw.ustring.isutf8,
args = { "\193\191" },
expect = { false }
},
{ name = 'byteoffset: (1)', func = mw.ustring.byteoffset,
args = { "fóo", 1 },
expect = { 1 }
},
{ name = 'byteoffset: (2)', func = mw.ustring.byteoffset,
args = { "fóo", 2 },
expect = { 2 }
},
{ name = 'byteoffset: (3)', func = mw.ustring.byteoffset,
args = { "fóo", 3 },
expect = { 4 }
},
{ name = 'byteoffset: (4)', func = mw.ustring.byteoffset,
args = { "fóo", 4 },
expect = { nil }
},
{ name = 'byteoffset: (0,1)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 1 },
expect = { 1 }
},
{ name = 'byteoffset: (0,2)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 2 },
expect = { 2 }
},
{ name = 'byteoffset: (0,3)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 3 },
expect = { 2 }
},
{ name = 'byteoffset: (0,4)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 4 },
expect = { 4 }
},
{ name = 'byteoffset: (0,5)', func = mw.ustring.byteoffset,
args = { "fóo", 0, 5 },
expect = { nil }
},
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
args = { "fóo", 0, -1 },
expect = { 4 }
},
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
args = { "foó", 0, -1 },
expect = { 3 }
},
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
args = { "fóo", 1, -1 },
expect = { 4 }
},
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
args = { "foó", 1, -1 },
expect = { nil }
},
{ name = 'codepoint: whole string', func = mw.ustring.codepoint,
args = { str1, 1, -1 },
expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
},
{ name = 'codepoint: substring', func = mw.ustring.codepoint,
args = { str1, 5, -2 },
expect = { 0x800, 0xffff, 0x10000 }
},
{ name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
args = { str1, 5, 4 },
expect = {}
},
{ name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
args = { str1, 1, 0 },
expect = {}
},
{ name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
args = { str1, 9, 9 },
expect = {}
},
{ name = 'codepoint: end of a really long string', func = mw.ustring.codepoint,
args = { str4, 9000, 9004 },
expect = { 0x20, 0x66, 0x151, 0xf3, 0x20 }
},
{ name = 'char: basic test', func = mw.ustring.char,
args = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff },
expect = { str1 }
},
{ name = 'char: invalid codepoint', func = mw.ustring.char,
args = { 0x110000 },
expect = "bad argument #1 to 'char' (value out of range)"
},
{ name = 'char: invalid value', func = mw.ustring.char,
args = { 'foo' },
expect = "bad argument #1 to 'char' (number expected, got string)"
},
{ name = 'len: basic test', func = mw.ustring.len,
args = { str1 },
expect = { 8 }
},
{ name = 'len: invalid string', func = mw.ustring.len,
args = { "\244\144\128\128" },
expect = { nil }
},
{ name = 'sub: (4)', func = mw.ustring.sub,
args = { str1, 4 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
},
{ name = 'sub: (4,7)', func = mw.ustring.sub,
args = { str1, 4, 7 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
},
{ name = 'sub: (4,-1)', func = mw.ustring.sub,
args = { str1, 4, -1 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
},
{ name = 'sub: (4,-2)', func = mw.ustring.sub,
args = { str1, 4, -2 },
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
},
{ name = 'sub: (-2)', func = mw.ustring.sub,
args = { str1, -2 },
expect = { "\240\144\128\128\244\143\191\191" }
},
{ name = 'sub: (9)', func = mw.ustring.sub,
args = { str1, 9 },
expect = { "" }
},
{ name = 'sub: (0)', func = mw.ustring.sub,
args = { str1, 0 },
expect = { str1 }
},
{ name = 'sub: (4,3)', func = mw.ustring.sub,
args = { str1, 4, 3 },
expect = { "" }
},
{ name = 'sub: (1,0)', func = mw.ustring.sub,
args = { str2, 1, 0 },
expect = { "" }
},
{ name = 'sub: (5,5)', func = mw.ustring.sub,
args = { str1, 5, 5 },
expect = { "\224\160\128" }
},
{ name = 'sub: (9,9)', func = mw.ustring.sub,
args = { str1, 9, 9 },
expect = { "" }
},
{ name = 'sub: empty string', func = mw.ustring.sub,
args = { '', 5 },
expect = { "" }
},
{ name = 'upper: basic test', func = mw.ustring.upper,
args = { "fóó?" },
expect = { "FÓÓ?" }
},
{ name = 'lower: basic test', func = mw.ustring.lower,
args = { "FÓÓ?" },
expect = { "fóó?" }
},
{ name = 'find: (simple)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡foo' },
expect = { 5, 8 }
},
{ name = 'find: (%)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo%+' },
expect = { }
},
{ name = 'find: (%)', func = mw.ustring.find,
args = { "bar ¡fo+ bar", '¡fo%+' },
expect = { 5, 8 }
},
{ name = 'find: (+)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo+' },
expect = { 5, 8 }
},
{ name = 'find: (+) (2)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fx+o+' },
expect = {}
},
{ name = 'find: (?)', func = mw.ustring.find,
args = { "bar ¡foox bar", '¡foox?' },
expect = { 5, 9 }
},
{ name = 'find: (?) (2)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡foox?' },
expect = { 5, 8 }
},
{ name = 'find: (*)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fx*oo' },
expect = { 5, 8 }
},
{ name = 'find: (-)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo-' },
expect = { 5, 6 }
},
{ name = 'find: (-)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡fo-o' },
expect = { 5, 7 }
},
{ name = 'find: (-)', func = mw.ustring.find,
args = { "bar ¡foox bar", '¡fo-x' },
expect = { 5, 9 }
},
{ name = 'find: (%a)', func = mw.ustring.find,
args = { "bar ¡foo bar", '¡f%a' },
expect = { 5, 7 }
},
{ name = 'find: (%a, utf8)', func = mw.ustring.find,
args = { "bar ¡fóó bar", '¡f%a' },
expect = { 5, 7 }
},
{ name = 'find: (%a, utf8 2)', func = mw.ustring.find,
args = { "bar ¡fóó bar", 'f%a' },
expect = { 6, 7 }
},
{ name = 'find: (%a+)', func = mw.ustring.find,
args = { "bar ¡fóó bar", '¡f%a+' },
expect = { 5, 8 }
},
{ name = 'find: ([]+)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '¡f[oó]+' },
expect = { 5, 8 }
},
{ name = 'find: ([-]+)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '¡f[a-uá-ú]+' },
expect = { 5, 8 }
},
{ name = 'find: ([-]+ 2)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '¡f[a-ú]+' },
expect = { 5, 8 }
},
{ name = 'find: (%b)', func = mw.ustring.find,
args = { "bar ¡<foo <foo> foo> bar", '¡%b<>' },
expect = { 5, 20 }
},
{ name = 'find: (%b 2)', func = mw.ustring.find,
args = { "bar ¡(foo (foo) foo) bar", '¡%b()' },
expect = { 5, 20 }
},
{ name = 'find: (%b 3)', func = mw.ustring.find,
args = { "bar ¡-foo-foo- bar", '¡%b--' },
expect = { 5, 10 }
},
{ name = 'find: (%b 4)', func = mw.ustring.find,
args = { "bar «foo «foo» foo» bar", '%b«»' },
expect = { 5, 19 }
},
{ name = 'find: (%b 5)', func = mw.ustring.find,
args = { "bar !foo !foo¡ foo¡ bar", '%b!¡' },
expect = { 5, 19 }
},
{ name = 'find: (%b 6)', func = mw.ustring.find,
args = { "bar ¡foo ¡foo! foo! bar", '%b¡!' },
expect = { 5, 19 }
},
{ name = 'find: (%b 7)', func = mw.ustring.find,
args = { "bar ¡foo¡foo¡ bar", '%b¡¡' },
expect = { 5, 9 }
},
{ name = 'find: (%f)', func = mw.ustring.find,
args = { "foo ¡foobar ¡foo bar baz", '¡.-%f[%s]' },
expect = { 5, 11 }
},
{ name = 'find: (%f 2)', func = mw.ustring.find,
args = { "foo ¡foobar ¡foo bar baz", '¡foo%f[%s]' },
expect = { 13, 16 }
},
{ name = 'find: (%f 3)', func = mw.ustring.find,
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]' },
expect = { 16, 19 }
},
{ name = 'find: (%f 4)', func = mw.ustring.find,
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]', 16 },
expect = { 16, 19 }
},
{ name = 'find: (%f 5)', func = mw.ustring.find,
args = { "foo ¡bar baz", '%f[%Z]' },
expect = { 1, 0 }
},
{ name = 'find: (%f 6)', func = mw.ustring.find,
args = { "foo ¡bar baz", '%f[%z]' },
expect = { 13, 12 }
},
{ name = 'find: (%f 7)', func = mw.ustring.find,
args = { "foo ¡b\0r baz", '%f[%Z]', 2 },
expect = { 8, 7 }
},
{ name = 'find: (%f 8)', func = mw.ustring.find,
args = { "\0foo ¡b\0r baz", '%f[%z]' },
expect = { 8, 7 }
},
{ name = 'find: (%f 9)', func = mw.ustring.find,
args = { "\0foo ¡b\0r baz", '%f[%Z]' },
expect = { 2, 1 }
},
{ name = 'find: (%A)', func = mw.ustring.find,
args = { "fóó? bar", '%A+' },
expect = { 4, 5 }
},
{ name = 'find: (%W)', func = mw.ustring.find,
args = { "fóó? bar", '%W+' },
expect = { 4, 5 }
},
{ name = 'find: ([^])', func = mw.ustring.find,
args = { "fóó? bar", '[^a-zó]+' },
expect = { 4, 5 }
},
{ name = 'find: ([^] 2)', func = mw.ustring.find,
args = { "fó0? bar", '[^%a0-9]+' },
expect = { 4, 5 }
},
{ name = 'find: ([^] 3)', func = mw.ustring.find,
args = { "¡fó0% bar", '¡[^%%]+' },
expect = { 1, 4 }
},
{ name = 'find: ($)', func = mw.ustring.find,
args = { "¡foo1 ¡foo2 ¡foo3", '¡foo[0-9]+$' },
expect = { 13, 17 }
},
{ name = 'find: (.*)', func = mw.ustring.find,
args = { "¡foo¡ ¡bar¡ baz", '¡.*¡' },
expect = { 1, 11 }
},
{ name = 'find: (.-)', func = mw.ustring.find,
args = { "¡foo¡ ¡bar¡ baz", '¡.-¡' },
expect = { 1, 5 }
},
{ name = 'find: plain', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '¡.¡', 1, true },
expect = { 5, 7 }
},
{ name = 'find: empty delimiter', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '' },
expect = { 1, 0 }
},
{ name = 'find: empty delimiter (2)', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 2 },
expect = { 2, 1 }
},
{ name = 'find: plain + empty delimiter', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 1, true },
expect = { 1, 0 }
},
{ name = 'find: plain + empty delimiter (2)', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 2, true },
expect = { 2, 1 }
},
{ name = 'find: excessive init', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '()', 20 },
expect = { 8, 7, 8 }
},
{ name = 'find: excessive init (2)', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '()', -20 },
expect = { 1, 0, 1 }
},
{ name = 'find: plain + excessive init', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', 20, true },
expect = { 8, 7 }
},
{ name = 'find: plain + excessive init', func = mw.ustring.find,
args = { "¡a¡ ¡.¡", '', -20, true },
expect = { 1, 0 }
},
{ name = 'find: capture (1)', func = mw.ustring.find,
args = { "bar ¡foo bar", '(¡foo)' },
expect = { 5, 8, '¡foo' }
},
{ name = 'find: capture (2)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '(¡f%a+)' },
expect = { 5, 8, '¡fóo' }
},
{ name = 'find: capture (3)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '(¡f(%a)%a)' },
expect = { 5, 8, '¡fóo', 'ó' }
},
{ name = 'find: capture (4)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '(¡f(%a-)%a)' },
expect = { 5, 7, '¡fó', '' }
},
{ name = 'find: capture (5)', func = mw.ustring.find,
args = { "bar ¡fóo bar", '()(()¡f()(%a)()%a())()' },
expect = { 5, 8, 5, '¡fóo', 5, 7, 'ó', 8, 9, 9 }
},
{ name = 'find: capture (6)', func = mw.ustring.find,
args = { "fóó", "()(f)()(óó)()" },
expect = { 1, 3, 1, 'f', 2, 'óó', 4 }
},
{ name = 'find: capture (7)', func = mw.ustring.find,
args = { "fóó fóó", "()(f)()(óó)()", 2 },
expect = { 5, 7, 5, 'f', 6, 'óó', 8 }
},
{ name = 'find: (%1)', func = mw.ustring.find,
args = { "foo foofóó foófoó bar", '(f%a+)%1' },
expect = { 12, 17, 'foó' }
},
{ name = 'find: deceptively-simple pattern', func = mw.ustring.find,
args = { "fóó", '([^a-z])' },
expect = { 2, 2, 'ó' }
},
{ name = 'find: Bracket at start of a character set doesn\'t close', func = mw.ustring.find,
args = { "fóó", '()[]' },
expect = "Missing close-bracket for character set beginning at pattern character 3"
},
{ name = 'find: Bracket at start of a negated character set doesn\'t close', func = mw.ustring.find,
args = { "fóó", '()[^]' },
expect = "Missing close-bracket for character set beginning at pattern character 3"
},
{ name = 'find: Bracket at start of a character set is literal', func = mw.ustring.find,
args = { "foo]bar¿", '()([]])' },
expect = { 4, 4, 4, ']' }
},
{ name = 'find: Bracket at start of a negated character set is literal', func = mw.ustring.find,
args = { "]bar¿", '()([^]])' },
expect = { 2, 2, 2, 'b' }
},
{ name = 'find: Bracket at start of a character set can be a range endpoint', func = mw.ustring.find,
args = { "foo]bar¿", '()([]-z]+)' },
expect = { 1, 7, 1, 'foo]bar' }
},
{ name = 'find: Bracket at start of a negated character can be a range endpoint', func = mw.ustring.find,
args = { "fOO]bar¿", '()([^]-z]+)' },
expect = { 2, 3, 2, 'OO' }
},
{ name = 'find: Weird edge-case that was failing (1)', func = mw.ustring.find,
args = { "foo]ba-]r¿", '()([a]-%]+)' },
expect = { 4, 4, 4, ']' }
},
{ name = 'find: Weird edge-case that was failing (2)', func = mw.ustring.find,
args = { "foo¿", '()[!-%]' },
expect = "Missing close-bracket for character set beginning at pattern character 3"
},
{ name = 'find: Inverted range (1)', func = mw.ustring.find,
args = { "foo¿", '()([z-a]+)' },
expect = { nil }
},
{ name = 'find: Inverted range (2)', func = mw.ustring.find,
args = { "foo¿", '()([^z-a]+)' },
expect = { 1, 4, 1, 'foo¿' }
},
{ name = 'find: Inverted range (3)', func = mw.ustring.find,
args = { "foo¿", '()(f[z-a]o)' },
expect = { nil }
},
{ name = 'find: Inverted range (4)', func = mw.ustring.find,
args = { "foo¿", '()(f[z-a]*o)' },
expect = { 1, 2, 1, 'fo' }
},
{ name = 'match: (1)', func = mw.ustring.match,
args = { "bar fóo bar", 'f%a+' },
expect = { 'fóo' }
},
{ name = 'match: (2)', func = mw.ustring.match,
args = { "bar fóo bar", 'f(%a+)' },
expect = { 'óo' }
},
{ name = 'match: empty pattern', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()' },
expect = { 1 }
},
{ name = 'match: empty pattern (2)', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()', 2 },
expect = { 2 }
},
{ name = 'match: excessive init', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()', 20 },
expect = { 8 }
},
{ name = 'match: excessive init (2)', func = mw.ustring.match,
args = { "¡a¡ ¡.¡", '()', -20 },
expect = { 1 }
},
{ name = 'gsub: (empty string, empty pattern)', func = mw.ustring.gsub,
args = { '', '', 'X' },
expect = { 'X', 1 }
},
{ name = 'gsub: (empty string, one char pattern)', func = mw.ustring.gsub,
args = { '', 'á', 'X' },
expect = { '', 0 }
},
{ name = 'gsub: (one char string, one char pattern)', func = mw.ustring.gsub,
args = { 'á', 'á', 'X' },
expect = { 'X', 1 }
},
{ name = 'gsub: (one char string, empty pattern)', func = mw.ustring.gsub,
args = { 'á', '', 'X' },
expect = { 'XáX', 2 }
},
{ name = 'gsub: (empty pattern with position captures)', func = mw.ustring.gsub,
args = { 'ábć', '()', '%1' },
expect = { '1á2b3ć4', 4 }
},
{ name = 'gsub: (limited to 1 replacement)', func = mw.ustring.gsub,
args = { 'áá', 'á', 'X', 1 },
expect = { '', 1 }
},
{ name = 'gsub: (limited to 0 replacements)', func = mw.ustring.gsub,
args = { 'áá', 'á', 'X', 0 },
expect = { 'áá', 0 }
},
{ name = 'gsub: (string 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', 'X' },
expect = { 'X bar X X baz X X X', 6 }
},
{ name = 'gsub: (string 2)', func = mw.ustring.gsub,
args = { str3, 'f%a+', 'X' },
expect = { '??? X bar X X baz X X X ok?', 6 }
},
{ name = 'gsub: (string 3)', func = mw.ustring.gsub,
args = { str2, 'f%a+', 'X', 3 },
expect = { 'X bar X X baz foooo foofoo fo', 3 }
},
{ name = 'gsub: (string 4)', func = mw.ustring.gsub,
args = { str3, 'f%a+', 'X', 3 },
expect = { '??? X bar X X baz foooo foofoo fo ok?', 3 }
},
{ name = 'gsub: (string 5)', func = mw.ustring.gsub,
args = { 'foo; fóó', '(f)(%a+)', '%%0=%0 %%1=%1 %%2=%2' },
expect = { '%0=foo %1=f %2=oo; %0=fóó %1=f %2=óó', 2 }
},
{ name = 'gsub: string, undocumented behavior where %1 works as %0 if there are no captures', func = mw.ustring.gsub,
args = { 'foo; fóó', '%a+', '%1!' },
expect = { 'foo!; fóó!', 2 }
},
{ name = 'gsub: (anchored)', func = mw.ustring.gsub,
args = { 'foofoofoo foo', '^foo', 'X' },
expect = { 'Xfoofoo foo', 1 }
},
{ name = 'gsub: (table 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
expect = { 'X bar Y Z baz foooo foofoo fo', 6 }
},
{ name = 'gsub: (table 2)', func = mw.ustring.gsub,
args = { str3, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
expect = { '??? X bar Y Z baz foooo foofoo fo ok?', 6 }
},
{ name = 'gsub: (table 3)', func = mw.ustring.gsub,
args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 },
expect = { str2, 1 }
},
{ name = 'gsub: (table 4)', func = mw.ustring.gsub,
args = { str3, 'f(%a+)', { oo = 'X', ['őó'] = 'Y', [''] = 'Z' } },
expect = { '??? X bar Y Z baz foooo foofoo fo ok?', 6 }
},
{ name = 'gsub: (table 5)', func = mw.ustring.gsub,
args = { str3, '(f)(%a+)', { f = 'F', oo = 'X', ['őó'] = 'Y', [''] = 'Z' } },
expect = { '??? F bar F F baz F F F ok?', 6 }
},
{ name = 'gsub: (inverted zero character class)', func = mw.ustring.gsub,
args = { "ó", '%Z', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (single dot pattern at end)', func = mw.ustring.gsub,
args = { "ó", '.', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (single dot pattern at end + leading)', func = mw.ustring.gsub,
args = { '', 'f.', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern)', func = mw.ustring.gsub,
args = { 'f ó b', 'f . b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with +)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .+ b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with -)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .- b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (dot pattern with *)', func = mw.ustring.gsub,
args = { 'f óóó b', 'f .* b', 'repl' },
expect = { 'repl', 1 }
},
{ name = 'gsub: (function 1)', func = mw.ustring.gsub,
args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 }
},
{ name = 'gsub: (function 2)', func = mw.ustring.gsub,
args = { str3, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
expect = { '??? -FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo ok?', 6 }
},
{ name = 'gsub: (function 3)', func = mw.ustring.gsub,
args = { str3, '(f)(%a+)', function(m1, m2) if m2 == 'o' then return nil end return '-' .. m1 .. mw.ustring.upper(m2) .. '-' end },
expect = { '??? -fOO- bar -fŐÓ- -fOÓ- baz -fOOOO- -fOOFOO- fo ok?', 6 }
},
{ name = 'gsub: invalid replacement string', func = mw.ustring.gsub,
args = { 'foo; fóó', '(%a+)', '%2' },
expect = "invalid capture index %2 in replacement string"
},
{ name = 'gsub: passing numbers instead of strings (1)', func = mw.ustring.gsub,
args = { 12345, '[3]', '9' },
expect = { '12945', 1 }
},
{ name = 'gsub: passing numbers instead of strings (2)', func = mw.ustring.gsub,
args = { '12345', 3, '9' },
expect = { '12945', 1 }
},
{ name = 'gsub: passing numbers instead of strings (3)', func = mw.ustring.gsub,
args = { '12345', '[3]', 9 },
expect = { '12945', 1 }
},
{ name = 'gsub: table replacement with a bad type (boolean)', func = mw.ustring.gsub,
args = { 'abc', 'b', { b = true } },
expect = 'invalid replacement value (a boolean)'
},
{ name = 'gsub: table replacement with a bad type (table)', func = mw.ustring.gsub,
args = { 'abc', 'b', { b = {} } },
expect = 'invalid replacement value (a table)'
},
{ name = 'gsub: table replacement with a bad type (function)', func = mw.ustring.gsub,
args = { 'abc', 'b', { b = function () end } },
expect = 'invalid replacement value (a function)'
},
{ name = 'gsub: function replacement with a bad type (boolean)', func = mw.ustring.gsub,
args = { 'abc', 'b', function () return true end },
expect = 'invalid replacement value (a boolean)'
},
{ name = 'gsub: function replacement with a bad type (table)', func = mw.ustring.gsub,
args = { 'abc', 'b', function () return {} end },
expect = 'invalid replacement value (a table)'
},
{ name = 'gsub: function replacement with a bad type (function)', func = mw.ustring.gsub,
args = { 'abc', 'b', function () return function () end end },
expect = 'invalid replacement value (a function)'
},
{ name = 'gcodepoint: basic test', func = mw.ustring.gcodepoint,
args = { str1 },
expect = { { 0 }, { 0x7f }, { 0x80 }, { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4)', func = mw.ustring.gcodepoint,
args = { str1, 4 },
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4, -2)', func = mw.ustring.gcodepoint,
args = { str1, 4, -2 },
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
type = 'Iterator'
},
{ name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
args = { str1, 4, 3 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
args = { str1, 1, 0 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
args = { str1, 9, 9 },
expect = {},
type = 'Iterator'
},
{ name = 'gcodepoint: really long string', func = testLongGcodepoint,
args = {},
expect = { {
[1] = 0x66, [2] = 0x151, [3] = 0xf3, [4] = 0x20,
[9997] = 0x66, [9998] = 0x151, [9999] = 0xf3, [10000] = 0x20,
} },
},
{ name = 'gmatch: test string 1', func = mw.ustring.gmatch,
args = { str2, 'f%a+' },
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
type = 'Iterator'
},
{ name = 'gmatch: test string 2', func = mw.ustring.gmatch,
args = { str3, 'f%a+' },
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
type = 'Iterator'
},
{ name = 'gmatch: anchored', func = mw.ustring.gmatch,
args = { "fóó1 ^fóó2 fóó3 ^fóó4", '^fóó%d+' },
expect = { { "^fóó2" }, { "^fóó4" } },
type = 'Iterator'
},
{ name = 'find: Pure-lua version, non-native error message', func = mw.ustring.find,
args = { "fóó", '[]' },
expect = "Missing close-bracket for character set beginning at pattern character 1"
},
{ name = 'match: Pure-lua version, non-native error message', func = mw.ustring.match,
args = { "fóó", '[]' },
expect = "Missing close-bracket for character set beginning at pattern character 1"
},
{ name = 'gsub: Pure-lua version, non-native error message', func = mw.ustring.gsub,
args = { "fóó", '[]', '' },
expect = "Missing close-bracket for character set beginning at pattern character 1"
},
{ name = 'string length limit',
func = function ()
local s = string.rep( "x", mw.ustring.maxStringLength + 1 )
local ret = { mw.ustring.gsub( s, 'a', 'b' ) }
-- So the output isn't insanely long
ret[1] = string.gsub( ret[1], 'xxxxx(x*)', function ( m )
return 'xxxxx[snip ' .. #m .. ' more]'
end )
return unpack( ret )
end,
expect = "bad argument #1 to 'gsub' (string is longer than " .. mw.ustring.maxStringLength .. " bytes)"
},
{ name = 'pattern length limit',
func = function ()
local pattern = string.rep( "x", mw.ustring.maxPatternLength + 1 )
return mw.ustring.gsub( 'a', pattern, 'b' )
end,
expect = "bad argument #2 to 'gsub' (pattern is longer than " .. mw.ustring.maxPatternLength .. " bytes)"
},
} )