Synchronize mw.text.nowiki() with wfEscapeWikiText in core

Added escapes for "!" and ";" as well as additional escapes
at beginning and end of string.

Bug: T168763
Co-Authored-By: vlakoff <vlakoff@gmail.com>
Co-Authored-By: C. Scott Ananian <cananian@wikimedia.org>
Depends-On: I34f2fa8c329e6f6771453b2f94dc4afbec31dac8
Change-Id: I6c9dcfdbbb2c6eff9414e24d3f2693ebe576505a
This commit is contained in:
vlakoff 2017-11-20 19:48:57 +00:00 committed by jenkins-bot
parent bce8a82d66
commit 6c340bff8d
2 changed files with 109 additions and 4 deletions

View file

@ -100,19 +100,32 @@ local nowikiRepl1 = {
['{'] = '&#123;', ['{'] = '&#123;',
['|'] = '&#124;', ['|'] = '&#124;',
['}'] = '&#125;', ['}'] = '&#125;',
[';'] = '&#59;',
} }
local nowikiRepl2 = { local nowikiRepl2 = {
["\n!"] = "\n&#33;", ["\r!"] = "\r&#33;",
["\n#"] = "\n&#35;", ["\r#"] = "\r&#35;", ["\n#"] = "\n&#35;", ["\r#"] = "\r&#35;",
["\n*"] = "\n&#42;", ["\r*"] = "\r&#42;", ["\n*"] = "\n&#42;", ["\r*"] = "\r&#42;",
["\n:"] = "\n&#58;", ["\r:"] = "\r&#58;", ["\n:"] = "\n&#58;", ["\r:"] = "\r&#58;",
["\n;"] = "\n&#59;", ["\r;"] = "\r&#59;",
["\n "] = "\n&#32;", ["\r "] = "\r&#32;", ["\n "] = "\n&#32;", ["\r "] = "\r&#32;",
["\n\n"] = "\n&#10;", ["\r\n"] = "&#13;\n", ["\n\n"] = "\n&#10;", ["\r\n"] = "&#13;\n",
["\n\r"] = "\n&#13;", ["\r\r"] = "\r&#13;", ["\n\r"] = "\n&#13;", ["\r\r"] = "\r&#13;",
["\n\t"] = "\n&#9;", ["\r\t"] = "\r&#9;", ["\n\t"] = "\n&#9;", ["\r\t"] = "\r&#9;",
} }
local nowikiRepl3 = {
['+'] = '&#43;',
['-'] = '&#45;',
['_'] = '&#95;',
['~'] = '&#126;',
["\n"] = "&#10;",
["\r"] = "&#13;",
["\t"] = "&#9;",
}
local nowikiRepl4 = nowikiRepl3
local nowikiReplMagic = {} local nowikiReplMagic = {}
for sp, esc in pairs( { for sp, esc in pairs( {
[' '] = '&#32;', [' '] = '&#32;',
@ -128,13 +141,20 @@ end
function mwtext.nowiki( s ) function mwtext.nowiki( s )
-- string.gsub is safe here, because we're only caring about ASCII chars -- string.gsub is safe here, because we're only caring about ASCII chars
s = string.gsub( s, '["&\'<=>%[%]{|}]', nowikiRepl1 ) s = string.gsub( s, '["&\'<=>%[%]{|};]', nowikiRepl1 )
s = '\n' .. s s = '\n' .. s
s = string.gsub( s, '[\r\n][#*:; \n\r\t]', nowikiRepl2 ) s = string.gsub( s, '[\r\n][!#*: \n\r\t]', nowikiRepl2 )
s = string.gsub( s, '([\r\n])%-%-%-%-', '%1&#45;---' ) s = string.gsub( s, '([\r\n])%-%-%-%-', '%1&#45;---' )
s = string.sub( s, 2 ) s = string.sub( s, 2 )
s = string.gsub( s, '!!', '&#33;!' )
s = string.gsub( s, '__', '_&#95;' ) s = string.gsub( s, '__', '_&#95;' )
s = string.gsub( s, '://', '&#58;//' ) s = string.gsub( s, '://', '&#58;//' )
s = string.gsub( s, '~~~', '~~&#126;' )
-- protect first and last character
s = string.gsub( s, '^[%-+_~]', nowikiRepl3 )
s = string.gsub( s, '[_~\r\n\t]$', nowikiRepl4 )
-- technically, should only do these if $wgEnableMagicLinks, but
-- it doesn't hurt to be safe
s = string.gsub( s, 'ISBN%s', nowikiReplMagic ) s = string.gsub( s, 'ISBN%s', nowikiReplMagic )
s = string.gsub( s, 'RFC%s', nowikiReplMagic ) s = string.gsub( s, 'RFC%s', nowikiReplMagic )
s = string.gsub( s, 'PMID%s', nowikiReplMagic ) s = string.gsub( s, 'PMID%s', nowikiReplMagic )

View file

@ -82,12 +82,97 @@ local tests = {
func = mw.text.nowiki, func = mw.text.nowiki,
args = { '*"&\'<=>[]{|}#*:;\n*\n#\n:\n;\nhttp://example.com:80/\nRFC 123, ISBN 456' }, args = { '*"&\'<=>[]{|}#*:;\n*\n#\n:\n;\nhttp://example.com:80/\nRFC 123, ISBN 456' },
expect = { expect = {
'&#42;&#34;&#38;&#39;&#60;&#61;&#62;&#91;&#93;&#123;&#124;&#125;#*:;' .. '&#42;&#34;&#38;&#39;&#60;&#61;&#62;&#91;&#93;&#123;&#124;&#125;#*:&#59;' ..
'\n&#42;\n&#35;\n&#58;\n&#59;\nhttp&#58;//example.com:80/' .. '\n&#42;\n&#35;\n&#58;\n&#59;\nhttp&#58;//example.com:80/' ..
'\nRFC&#32;123, ISBN&#32;456' '\nRFC&#32;123, ISBN&#32;456'
} }
}, },
-- nowiki tests cases taken from wfEscapeWikiText test cases in core
{ name = 'nowiki noescapes',
func = mw.text.nowiki,
args = { 'a' },
expect = {
'a'
}
},
{ name = 'nowiki braces and brackets',
func = mw.text.nowiki,
args = { '[[WikiLink]] {{Template}} <html>' },
expect = {
'&#91;&#91;WikiLink&#93;&#93; &#123;&#123;Template&#125;&#125; &#60;html&#62;'
}
},
{ name = 'nowiki quotes',
func = mw.text.nowiki,
args = { '"' .. "'" },
expect = { '&#34;&#39;' },
},
{ name = 'nowiki tokens',
func = mw.text.nowiki,
args = { '{| {- {+ !! ~~~~~ __FOO__' },
expect = {
'&#123;&#124; &#123;- &#123;+ &#33;! ~~&#126;~~ _&#95;FOO_&#95;'
},
},
{ name = 'nowiki start of line',
func = mw.text.nowiki,
args = { '* foo\n! bar\n# bat\n:baz\n pre\n----' },
expect = {
'&#42; foo\n&#33; bar\n&#35; bat\n&#58;baz\n&#32;pre\n&#45;---',
},
},
{ name = 'nowiki paragraph separators',
func = mw.text.nowiki,
args = { 'a\n\n\n\nb' },
expect = { 'a\n&#10;\n&#10;b' },
},
{ name = 'nowiki language converter',
func = mw.text.nowiki,
args = { '-{ foo ; bar }-' },
expect = { '&#45;&#123; foo &#59; bar &#125;-' },
},
{ name = 'nowiki left-side context: |+',
func = mw.text.nowiki,
args = { '+ foo + bar' },
expect = { '&#43; foo + bar' },
},
{ name = 'nowiki left-side context: |-',
func = mw.text.nowiki,
args = { '- foo - bar' },
expect = { '&#45; foo - bar' },
},
{ name = 'nowiki left-side context: __FOO__',
func = mw.text.nowiki,
args = { '_FOO__' },
expect = { '&#95;FOO_&#95;' },
},
{ name = 'nowiki left-side context: ~~~',
func = mw.text.nowiki,
args = { '~~ long string here' },
expect = { '&#126;~ long string here' },
},
{ name = 'nowiki left-side context: newlines',
func = mw.text.nowiki,
args = { '\n\n\nFoo' },
expect = { '&#10;\n&#10;Foo' },
},
{ name = 'nowiki right-side context: ~~~',
func = mw.text.nowiki,
args = { 'long string here ~~' },
expect = { 'long string here ~&#126;' },
},
{ name = 'nowiki right-side context: __FOO__',
func = mw.text.nowiki,
args = { '__FOO_' },
expect = { '&#95;&#95;FOO&#95;' },
},
{ name = 'nowiki right-side context: newlines',
func = mw.text.nowiki,
args = { 'foo\n\n\n' },
expect = { 'foo\n&#10;&#10;' },
},
{ name = 'tag, simple', { name = 'tag, simple',
func = mw.text.tag, func = mw.text.tag,
args = { { name = 'b' } }, args = { { name = 'b' } },