mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Scribunto
synced 2024-11-15 03:35:29 +00:00
0367e9bddd
The pure-Lua ustring pattern matching functions short-circuit to the much faster string library when the pattern would match the same against the raw bytes. A pattern like "[^a-z]" can match a partial UTF-8 character when applied bytewise, and so must be detected as unsafe. Let's also directly test the pure-Lua module, instead of me having to comment out lines in Scribunto_LuaUstringLibrary::register() whenever I want to test them. Change-Id: I91ed3374aadfea379b9db2e13b4248ab20df509e
586 lines
18 KiB
Lua
586 lines
18 KiB
Lua
local testframework = require 'Module:TestFramework'
|
||
|
||
local str1 = "\0\127\194\128\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191"
|
||
local str2 = "foo bar főó foó baz foooo foofoo fo"
|
||
local str3 = "??? foo bar főó foó baz foooo foofoo fo ok?"
|
||
|
||
return testframework.getTestProvider( {
|
||
{ name = 'isutf8: valid string', func = mw.ustring.isutf8,
|
||
args = { "\0 \127 \194\128 \223\191 \224\160\128 \239\191\191 \240\144\128\128 \244\143\191\191" },
|
||
expect = { true }
|
||
},
|
||
{ name = 'isutf8: out of range character', func = mw.ustring.isutf8,
|
||
args = { "\244\144\128\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: insufficient continuation bytes', func = mw.ustring.isutf8,
|
||
args = { "\240\128\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: excess continuation bytes', func = mw.ustring.isutf8,
|
||
args = { "\194\128\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: bare continuation byte', func = mw.ustring.isutf8,
|
||
args = { "\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: overlong encoding', func = mw.ustring.isutf8,
|
||
args = { "\192\128" },
|
||
expect = { false }
|
||
},
|
||
{ name = 'isutf8: overlong encoding (2)', func = mw.ustring.isutf8,
|
||
args = { "\193\191" },
|
||
expect = { false }
|
||
},
|
||
|
||
{ name = 'byteoffset: (1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 1 },
|
||
expect = { 1 }
|
||
},
|
||
{ name = 'byteoffset: (2)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 2 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'byteoffset: (3)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 3 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (4)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 4 },
|
||
expect = { nil }
|
||
},
|
||
{ name = 'byteoffset: (0,1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 1 },
|
||
expect = { 1 }
|
||
},
|
||
{ name = 'byteoffset: (0,2)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 2 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'byteoffset: (0,3)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 3 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'byteoffset: (0,4)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 4 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (0,5)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, 5 },
|
||
expect = { nil }
|
||
},
|
||
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 0, -1 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (0,-1)', func = mw.ustring.byteoffset,
|
||
args = { "foó", 0, -1 },
|
||
expect = { 3 }
|
||
},
|
||
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
|
||
args = { "fóo", 1, -1 },
|
||
expect = { 4 }
|
||
},
|
||
{ name = 'byteoffset: (1,-1)', func = mw.ustring.byteoffset,
|
||
args = { "foó", 1, -1 },
|
||
expect = { nil }
|
||
},
|
||
|
||
{ name = 'codepoint: whole string', func = mw.ustring.codepoint,
|
||
args = { str1, 1, -1 },
|
||
expect = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff }
|
||
},
|
||
{ name = 'codepoint: substring', func = mw.ustring.codepoint,
|
||
args = { str1, 5, -2 },
|
||
expect = { 0x800, 0xffff, 0x10000 }
|
||
},
|
||
{ name = 'codepoint: (5,4)', func = mw.ustring.codepoint,
|
||
args = { str1, 5, 4 },
|
||
expect = {}
|
||
},
|
||
{ name = 'codepoint: (1,0)', func = mw.ustring.codepoint,
|
||
args = { str1, 1, 0 },
|
||
expect = {}
|
||
},
|
||
{ name = 'codepoint: (9,9)', func = mw.ustring.codepoint,
|
||
args = { str1, 9, 9 },
|
||
expect = {}
|
||
},
|
||
|
||
{ name = 'char: basic test', func = mw.ustring.char,
|
||
args = { 0, 0x7f, 0x80, 0x7ff, 0x800, 0xffff, 0x10000, 0x10ffff },
|
||
expect = { str1 }
|
||
},
|
||
{ name = 'char: invalid codepoint', func = mw.ustring.char,
|
||
args = { 0x110000 },
|
||
expect = "bad argument #1 to 'char' (value out of range)"
|
||
},
|
||
{ name = 'char: invalid value', func = mw.ustring.char,
|
||
args = { 'foo' },
|
||
expect = "bad argument #1 to 'char' (number expected, got string)"
|
||
},
|
||
|
||
{ name = 'len: basic test', func = mw.ustring.len,
|
||
args = { str1 },
|
||
expect = { 8 }
|
||
},
|
||
{ name = 'len: invalid string', func = mw.ustring.len,
|
||
args = { "\244\144\128\128" },
|
||
expect = { nil }
|
||
},
|
||
|
||
{ name = 'sub: (4)', func = mw.ustring.sub,
|
||
args = { str1, 4 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
|
||
},
|
||
{ name = 'sub: (4,7)', func = mw.ustring.sub,
|
||
args = { str1, 4, 7 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
|
||
},
|
||
{ name = 'sub: (4,-1)', func = mw.ustring.sub,
|
||
args = { str1, 4, -1 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128\244\143\191\191" }
|
||
},
|
||
{ name = 'sub: (4,-2)', func = mw.ustring.sub,
|
||
args = { str1, 4, -2 },
|
||
expect = { "\223\191\224\160\128\239\191\191\240\144\128\128" }
|
||
},
|
||
{ name = 'sub: (-2)', func = mw.ustring.sub,
|
||
args = { str1, -2 },
|
||
expect = { "\240\144\128\128\244\143\191\191" }
|
||
},
|
||
{ name = 'sub: (9)', func = mw.ustring.sub,
|
||
args = { str1, 9 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: (0)', func = mw.ustring.sub,
|
||
args = { str1, 0 },
|
||
expect = { str1 }
|
||
},
|
||
{ name = 'sub: (4,3)', func = mw.ustring.sub,
|
||
args = { str1, 4, 3 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: (1,0)', func = mw.ustring.sub,
|
||
args = { str2, 1, 0 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: (5,5)', func = mw.ustring.sub,
|
||
args = { str1, 5, 5 },
|
||
expect = { "\224\160\128" }
|
||
},
|
||
{ name = 'sub: (9,9)', func = mw.ustring.sub,
|
||
args = { str1, 9, 9 },
|
||
expect = { "" }
|
||
},
|
||
{ name = 'sub: empty string', func = mw.ustring.sub,
|
||
args = { '', 5 },
|
||
expect = { "" }
|
||
},
|
||
|
||
{ name = 'upper: basic test', func = mw.ustring.upper,
|
||
args = { "fóó?" },
|
||
expect = { "FÓÓ?" }
|
||
},
|
||
{ name = 'lower: basic test', func = mw.ustring.lower,
|
||
args = { "FÓÓ?" },
|
||
expect = { "fóó?" }
|
||
},
|
||
|
||
{ name = 'find: (simple)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡foo' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (%)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo%+' },
|
||
expect = { }
|
||
},
|
||
{ name = 'find: (%)', func = mw.ustring.find,
|
||
args = { "bar ¡fo+ bar", '¡fo%+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (+)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (+) (2)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fx+o+' },
|
||
expect = {}
|
||
},
|
||
{ name = 'find: (?)', func = mw.ustring.find,
|
||
args = { "bar ¡foox bar", '¡foox?' },
|
||
expect = { 5, 9 }
|
||
},
|
||
{ name = 'find: (?) (2)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡foox?' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (*)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fx*oo' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (-)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo-' },
|
||
expect = { 5, 6 }
|
||
},
|
||
{ name = 'find: (-)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡fo-o' },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: (-)', func = mw.ustring.find,
|
||
args = { "bar ¡foox bar", '¡fo-x' },
|
||
expect = { 5, 9 }
|
||
},
|
||
{ name = 'find: (%a)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '¡f%a' },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: (%a, utf8)', func = mw.ustring.find,
|
||
args = { "bar ¡fóó bar", '¡f%a' },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: (%a, utf8 2)', func = mw.ustring.find,
|
||
args = { "bar ¡fóó bar", 'f%a' },
|
||
expect = { 6, 7 }
|
||
},
|
||
{ name = 'find: (%a+)', func = mw.ustring.find,
|
||
args = { "bar ¡fóó bar", '¡f%a+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: ([]+)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '¡f[oó]+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: ([-]+)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '¡f[a-uá-ú]+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: ([-]+ 2)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '¡f[a-ú]+' },
|
||
expect = { 5, 8 }
|
||
},
|
||
{ name = 'find: (%b)', func = mw.ustring.find,
|
||
args = { "bar ¡<foo <foo> foo> bar", '¡%b<>' },
|
||
expect = { 5, 20 }
|
||
},
|
||
{ name = 'find: (%b 2)', func = mw.ustring.find,
|
||
args = { "bar ¡(foo (foo) foo) bar", '¡%b()' },
|
||
expect = { 5, 20 }
|
||
},
|
||
{ name = 'find: (%b 3)', func = mw.ustring.find,
|
||
args = { "bar ¡-foo-foo- bar", '¡%b--' },
|
||
expect = { 5, 10 }
|
||
},
|
||
{ name = 'find: (%b 4)', func = mw.ustring.find,
|
||
args = { "bar «foo «foo» foo» bar", '%b«»' },
|
||
expect = { 5, 19 }
|
||
},
|
||
{ name = 'find: (%b 5)', func = mw.ustring.find,
|
||
args = { "bar !foo !foo¡ foo¡ bar", '%b!¡' },
|
||
expect = { 5, 19 }
|
||
},
|
||
{ name = 'find: (%b 6)', func = mw.ustring.find,
|
||
args = { "bar ¡foo ¡foo! foo! bar", '%b¡!' },
|
||
expect = { 5, 19 }
|
||
},
|
||
{ name = 'find: (%b 7)', func = mw.ustring.find,
|
||
args = { "bar ¡foo¡foo¡ bar", '%b¡¡' },
|
||
expect = { 5, 9 }
|
||
},
|
||
{ name = 'find: (%f)', func = mw.ustring.find,
|
||
args = { "foo ¡foobar ¡foo bar baz", '¡.-%f[%s]' },
|
||
expect = { 5, 11 }
|
||
},
|
||
{ name = 'find: (%f 2)', func = mw.ustring.find,
|
||
args = { "foo ¡foobar ¡foo bar baz", '¡foo%f[%s]' },
|
||
expect = { 13, 16 }
|
||
},
|
||
{ name = 'find: (%f 3)', func = mw.ustring.find,
|
||
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]' },
|
||
expect = { 16, 19 }
|
||
},
|
||
{ name = 'find: (%f 4)', func = mw.ustring.find,
|
||
args = { "foo foo¡foobar ¡foo bar baz", '%f[%S]¡.-%f[%s]', 16 },
|
||
expect = { 16, 19 }
|
||
},
|
||
{ name = 'find: (%f 5)', func = mw.ustring.find,
|
||
args = { "foo ¡bar baz", '%f[%Z]' },
|
||
expect = { 1, 0 }
|
||
},
|
||
{ name = 'find: (%f 6)', func = mw.ustring.find,
|
||
args = { "foo ¡bar baz", '%f[%z]' },
|
||
expect = { 13, 12 }
|
||
},
|
||
{ name = 'find: (%f 7)', func = mw.ustring.find,
|
||
args = { "foo ¡b\0r baz", '%f[%Z]', 2 },
|
||
expect = { 8, 7 }
|
||
},
|
||
{ name = 'find: (%f 8)', func = mw.ustring.find,
|
||
args = { "\0foo ¡b\0r baz", '%f[%z]' },
|
||
expect = { 8, 7 }
|
||
},
|
||
{ name = 'find: (%f 9)', func = mw.ustring.find,
|
||
args = { "\0foo ¡b\0r baz", '%f[%Z]' },
|
||
expect = { 2, 1 }
|
||
},
|
||
{ name = 'find: (%A)', func = mw.ustring.find,
|
||
args = { "fóó? bar", '%A+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: (%W)', func = mw.ustring.find,
|
||
args = { "fóó? bar", '%W+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: ([^])', func = mw.ustring.find,
|
||
args = { "fóó? bar", '[^a-zó]+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: ([^] 2)', func = mw.ustring.find,
|
||
args = { "fó0? bar", '[^%a0-9]+' },
|
||
expect = { 4, 5 }
|
||
},
|
||
{ name = 'find: ([^] 3)', func = mw.ustring.find,
|
||
args = { "¡fó0% bar", '¡[^%%]+' },
|
||
expect = { 1, 4 }
|
||
},
|
||
{ name = 'find: ($)', func = mw.ustring.find,
|
||
args = { "¡foo1 ¡foo2 ¡foo3", '¡foo[0-9]+$' },
|
||
expect = { 13, 17 }
|
||
},
|
||
{ name = 'find: (.*)', func = mw.ustring.find,
|
||
args = { "¡foo¡ ¡bar¡ baz", '¡.*¡' },
|
||
expect = { 1, 11 }
|
||
},
|
||
{ name = 'find: (.-)', func = mw.ustring.find,
|
||
args = { "¡foo¡ ¡bar¡ baz", '¡.-¡' },
|
||
expect = { 1, 5 }
|
||
},
|
||
{ name = 'find: plain', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '¡.¡', 1, true },
|
||
expect = { 5, 7 }
|
||
},
|
||
{ name = 'find: empty delimiter', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '' },
|
||
expect = { 1, 0 }
|
||
},
|
||
{ name = 'find: empty delimiter (2)', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 2 },
|
||
expect = { 2, 1 }
|
||
},
|
||
{ name = 'find: plain + empty delimiter', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 1, true },
|
||
expect = { 1, 0 }
|
||
},
|
||
{ name = 'find: plain + empty delimiter (2)', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 2, true },
|
||
expect = { 2, 1 }
|
||
},
|
||
{ name = 'find: excessive init', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '()', 20 },
|
||
expect = { 8, 7, 8 }
|
||
},
|
||
{ name = 'find: excessive init (2)', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '()', -20 },
|
||
expect = { 1, 0, 1 }
|
||
},
|
||
{ name = 'find: plain + excessive init', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', 20, true },
|
||
expect = { 8, 7 }
|
||
},
|
||
{ name = 'find: plain + excessive init', func = mw.ustring.find,
|
||
args = { "¡a¡ ¡.¡", '', -20, true },
|
||
expect = { 1, 0 }
|
||
},
|
||
|
||
{ name = 'find: capture (1)', func = mw.ustring.find,
|
||
args = { "bar ¡foo bar", '(¡foo)' },
|
||
expect = { 5, 8, '¡foo' }
|
||
},
|
||
{ name = 'find: capture (2)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '(¡f%a+)' },
|
||
expect = { 5, 8, '¡fóo' }
|
||
},
|
||
{ name = 'find: capture (3)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '(¡f(%a)%a)' },
|
||
expect = { 5, 8, '¡fóo', 'ó' }
|
||
},
|
||
{ name = 'find: capture (4)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '(¡f(%a-)%a)' },
|
||
expect = { 5, 7, '¡fó', '' }
|
||
},
|
||
{ name = 'find: capture (5)', func = mw.ustring.find,
|
||
args = { "bar ¡fóo bar", '()(()¡f()(%a)()%a())()' },
|
||
expect = { 5, 8, 5, '¡fóo', 5, 7, 'ó', 8, 9, 9 }
|
||
},
|
||
{ name = 'find: capture (6)', func = mw.ustring.find,
|
||
args = { "fóó", "()(f)()(óó)()" },
|
||
expect = { 1, 3, 1, 'f', 2, 'óó', 4 }
|
||
},
|
||
{ name = 'find: capture (7)', func = mw.ustring.find,
|
||
args = { "fóó fóó", "()(f)()(óó)()", 2 },
|
||
expect = { 5, 7, 5, 'f', 6, 'óó', 8 }
|
||
},
|
||
{ name = 'find: (%1)', func = mw.ustring.find,
|
||
args = { "foo foofóó foófoó bar", '(f%a+)%1' },
|
||
expect = { 12, 17, 'foó' }
|
||
},
|
||
{ name = 'find: deceptively-simple pattern', func = mw.ustring.find,
|
||
args = { "fóó", '([^a-z])' },
|
||
expect = { 2, 2, 'ó' }
|
||
},
|
||
|
||
{ name = 'match: (1)', func = mw.ustring.match,
|
||
args = { "bar fóo bar", 'f%a+' },
|
||
expect = { 'fóo' }
|
||
},
|
||
{ name = 'match: (2)', func = mw.ustring.match,
|
||
args = { "bar fóo bar", 'f(%a+)' },
|
||
expect = { 'óo' }
|
||
},
|
||
{ name = 'match: empty pattern', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()' },
|
||
expect = { 1 }
|
||
},
|
||
{ name = 'match: empty pattern (2)', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()', 2 },
|
||
expect = { 2 }
|
||
},
|
||
{ name = 'match: excessive init', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()', 20 },
|
||
expect = { 8 }
|
||
},
|
||
{ name = 'match: excessive init (2)', func = mw.ustring.match,
|
||
args = { "¡a¡ ¡.¡", '()', -20 },
|
||
expect = { 1 }
|
||
},
|
||
|
||
{ name = 'gsub: (string 1)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', 'X' },
|
||
expect = { 'X bar X X baz X X X', 6 }
|
||
},
|
||
{ name = 'gsub: (string 2)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', 'X' },
|
||
expect = { '??? X bar X X baz X X X ok?', 6 }
|
||
},
|
||
{ name = 'gsub: (string 3)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', 'X', 3 },
|
||
expect = { 'X bar X X baz foooo foofoo fo', 3 }
|
||
},
|
||
{ name = 'gsub: (string 4)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', 'X', 3 },
|
||
expect = { '??? X bar X X baz foooo foofoo fo ok?', 3 }
|
||
},
|
||
{ name = 'gsub: (string 5)', func = mw.ustring.gsub,
|
||
args = { 'foo; fóó', '(f)(%a+)', '%%0=%0 %%1=%1 %%2=%2' },
|
||
expect = { '%0=foo %1=f %2=oo; %0=fóó %1=f %2=óó', 2 }
|
||
},
|
||
{ name = 'gsub: (anchored)', func = mw.ustring.gsub,
|
||
args = { 'foofoofoo foo', '^foo', 'X' },
|
||
expect = { 'Xfoofoo foo', 1 }
|
||
},
|
||
{ name = 'gsub: (table 1)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
|
||
expect = { 'X bar Y Z baz foooo foofoo fo', 6 }
|
||
},
|
||
{ name = 'gsub: (table 2)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', { foo = 'X', ['főó'] = 'Y', ['foó'] = 'Z' } },
|
||
expect = { '??? X bar Y Z baz foooo foofoo fo ok?', 6 }
|
||
},
|
||
{ name = 'gsub: (table 3)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', { ['főó'] = 'Y', ['foó'] = 'Z' }, 1 },
|
||
expect = { str2, 1 }
|
||
},
|
||
{ name = 'gsub: (function 1)', func = mw.ustring.gsub,
|
||
args = { str2, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
|
||
expect = { '-FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo', 6 }
|
||
},
|
||
{ name = 'gsub: (function 2)', func = mw.ustring.gsub,
|
||
args = { str3, 'f%a+', function(m) if m == 'fo' then return nil end return '-' .. mw.ustring.upper(m) .. '-' end },
|
||
expect = { '??? -FOO- bar -FŐÓ- -FOÓ- baz -FOOOO- -FOOFOO- fo ok?', 6 }
|
||
},
|
||
{ name = 'gsub: invalid replacement string', func = mw.ustring.gsub,
|
||
args = { 'foo; fóó', '(%a+)', '%2' },
|
||
expect = "invalid capture index %2 in replacement string"
|
||
},
|
||
{ name = 'gsub: passing numbers instead of strings (1)', func = mw.ustring.gsub,
|
||
args = { 12345, '[33]', '9' },
|
||
expect = { '12945', 1 }
|
||
},
|
||
{ name = 'gsub: passing numbers instead of strings (2)', func = mw.ustring.gsub,
|
||
args = { '12345', 3, '9' },
|
||
expect = { '12945', 1 }
|
||
},
|
||
{ name = 'gsub: passing numbers instead of strings (3)', func = mw.ustring.gsub,
|
||
args = { '12345', '[33]', 9 },
|
||
expect = { '12945', 1 }
|
||
},
|
||
|
||
{ name = 'gcodepoint: basic test', func = mw.ustring.gcodepoint,
|
||
args = { str1 },
|
||
expect = { { 0 }, { 0x7f }, { 0x80 }, { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (4)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 4 },
|
||
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 }, { 0x10ffff } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (4, -2)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 4, -2 },
|
||
expect = { { 0x7ff }, { 0x800 }, { 0xffff }, { 0x10000 } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (4, 3)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 4, 3 },
|
||
expect = {},
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (1, 0)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 1, 0 },
|
||
expect = {},
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gcodepoint: (9, 9)', func = mw.ustring.gcodepoint,
|
||
args = { str1, 9, 9 },
|
||
expect = {},
|
||
type = 'Iterator'
|
||
},
|
||
|
||
{ name = 'gmatch: test string 1', func = mw.ustring.gmatch,
|
||
args = { str2, 'f%a+' },
|
||
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gmatch: test string 2', func = mw.ustring.gmatch,
|
||
args = { str3, 'f%a+' },
|
||
expect = { { 'foo' }, { 'főó' }, { 'foó' }, { 'foooo' }, { 'foofoo' }, { 'fo' } },
|
||
type = 'Iterator'
|
||
},
|
||
{ name = 'gmatch: anchored', func = mw.ustring.gmatch,
|
||
args = { "fóó1 ^fóó2 fóó3 ^fóó4", '^fóó%d+' },
|
||
expect = { { "^fóó2" }, { "^fóó4" } },
|
||
type = 'Iterator'
|
||
},
|
||
|
||
{ name = 'string length limit',
|
||
func = function ()
|
||
local s = string.rep( "x", mw.ustring.maxStringLength + 1 )
|
||
local ret = { mw.ustring.gsub( s, 'a', 'b' ) }
|
||
-- So the output isn't insanely long
|
||
ret[1] = string.gsub( ret[1], 'xxxxx(x*)', function ( m )
|
||
return 'xxxxx[snip ' .. #m .. ' more]'
|
||
end )
|
||
return unpack( ret )
|
||
end,
|
||
expect = "bad argument #1 to 'gsub' (string is longer than " .. mw.ustring.maxStringLength .. " bytes)"
|
||
},
|
||
{ name = 'pattern length limit',
|
||
func = function ()
|
||
local pattern = string.rep( "x", mw.ustring.maxPatternLength + 1 )
|
||
return mw.ustring.gsub( 'a', pattern, 'b' )
|
||
end,
|
||
expect = "bad argument #2 to 'gsub' (pattern is longer than " .. mw.ustring.maxPatternLength .. " bytes)"
|
||
},
|
||
} )
|