mediawiki-extensions-Scribunto/engines/LuaCommon/lualib/mw.text.lua
Brad Jorsch e5564cf942 Add mw.text.unstripNoWiki, mw.text.killMarkers, fix mw.text.unstrip
mw.text.unstrip is too broad, it's allowing for unstripping things that
cause problems when unstripped (e.g. bug 61268). Since the original
request was only for unstripping <nowiki>, let's add a function that
does only that.

We should also add an interface to StripState::killMarkers(), instead of
requiring everyone to roll their own work-alike.

Then, to fix the bug, we can make mw.text.unstrip be the combination of
the two. This is the most like the original behavior of mw.text.unstrip
(removes all strip markers, replacing them with text where applicable)
without causing issues.

Bug: 61268
Change-Id: I3a151fd678b365d629b71b4f1cb0d5d284b98555
2014-11-05 12:32:35 -05:00

288 lines
6.7 KiB
Lua

local mwtext = {}
local php
local options
local util = require 'libraryUtil'
local checkType = util.checkType
local checkTypeForNamedArg = util.checkTypeForNamedArg
function mwtext.setupInterface( opts )
-- Boilerplate
mwtext.setupInterface = nil
php = mw_interface
mw_interface = nil
options = opts
-- Register this library in the "mw" global
mw = mw or {}
mw.text = mwtext
package.loaded['mw.text'] = mwtext
end
function mwtext.trim( s, charset )
charset = charset or '\t\r\n\f '
s = mw.ustring.gsub( s, '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
return s
end
local htmlencode_map = {
['>'] = '&gt;',
['<'] = '&lt;',
['&'] = '&amp;',
['"'] = '&quot;',
["'"] = '&#039;',
['\194\160'] = '&nbsp;',
}
local htmldecode_map = {}
for k, v in pairs( htmlencode_map ) do
htmldecode_map[v] = k
end
local decode_named_entities = nil
function mwtext.encode( s, charset )
charset = charset or '<>&"\'\194\160'
s = mw.ustring.gsub( s, '[' .. charset .. ']', function ( m )
if not htmlencode_map[m] then
local e = string.format( '&#%d;', mw.ustring.codepoint( m ) )
htmlencode_map[m] = e
htmldecode_map[e] = m
end
return htmlencode_map[m]
end )
return s
end
function mwtext.decode( s, decodeNamedEntities )
local dec
if decodeNamedEntities then
if decode_named_entities == nil then
decode_named_entities = php.getEntityTable()
setmetatable( decode_named_entities, { __index = htmldecode_map } )
end
dec = decode_named_entities
else
dec = htmldecode_map
end
-- string.gsub is safe here, because only ASCII chars are in the pattern
s = string.gsub( s, '(&(#?x?)([a-zA-Z0-9]+);)', function ( m, flg, name )
if not dec[m] then
local n = nil
if flg == '#' then
n = tonumber( name, 10 )
elseif flg == '#x' then
n = tonumber( name, 16 )
end
if n and n <= 0x10ffff then
n = mw.ustring.char( n )
if n then
htmldecode_map[m] = n
htmlencode_map[n] = m
end
end
end
return dec[m]
end )
return s
end
local nowikiRepl1 = {
['"'] = '&#34;',
['&'] = '&#38;',
["'"] = '&#39;',
['<'] = '&#60;',
['='] = '&#61;',
['>'] = '&#62;',
['['] = '&#91;',
[']'] = '&#93;',
['{'] = '&#123;',
['|'] = '&#124;',
['}'] = '&#125;',
}
local nowikiRepl2 = {
["\n#"] = "\n&#35;", ["\r#"] = "\r&#35;",
["\n*"] = "\n&#42;", ["\r*"] = "\r&#42;",
["\n:"] = "\n&#58;", ["\r:"] = "\r&#58;",
["\n;"] = "\n&#59;", ["\r;"] = "\r&#59;",
["\n "] = "\n&#32;", ["\r "] = "\r&#32;",
["\n\n"] = "\n&#10;", ["\r\n"] = "&#13;\n",
["\n\r"] = "\n&#13;", ["\r\r"] = "\r&#13;",
["\n\t"] = "\n&#9;", ["\r\t"] = "\r&#9;",
}
local nowikiReplMagic = {}
for sp, esc in pairs( {
[' '] = '&#32;',
['\t'] = '&#9;',
['\r'] = '&#13;',
['\n'] = '&#10;',
['\f'] = '&#12;',
} ) do
nowikiReplMagic['ISBN' .. sp] = 'ISBN' .. esc
nowikiReplMagic['RFC' .. sp] = 'RFC' .. esc
nowikiReplMagic['PMID' .. sp] = 'PMID' .. esc
end
function mwtext.nowiki( s )
-- string.gsub is safe here, because we're only caring about ASCII chars
s = string.gsub( s, '["&\'<=>%[%]{|}]', nowikiRepl1 )
s = '\n' .. s
s = string.gsub( s, '[\r\n][#*:; \n\r\t]', nowikiRepl2 )
s = string.gsub( s, '([\r\n])%-%-%-%-', '%1&#45;---' )
s = string.sub( s, 2 )
s = string.gsub( s, '__', '_&#95;' )
s = string.gsub( s, '://', '&#58;//' )
s = string.gsub( s, 'ISBN%s', nowikiReplMagic )
s = string.gsub( s, 'RFC%s', nowikiReplMagic )
s = string.gsub( s, 'PMID%s', nowikiReplMagic )
for k, v in pairs( options.nowiki_protocols ) do
s = string.gsub( s, k, v )
end
return s
end
function mwtext.tag( name, attrs, content )
local named = false
if type( name ) == 'table' then
named = true
name, attrs, content = name.name, name.attrs, name.content
checkTypeForNamedArg( 'tag', 'name', name, 'string' )
checkTypeForNamedArg( 'tag', 'attrs', attrs, 'table', true )
else
checkType( 'tag', 1, name, 'string' )
checkType( 'tag', 2, attrs, 'table', true )
end
local ret = { '<' .. name }
for k, v in pairs( attrs or {} ) do
if type( k ) ~= 'string' then
error( "bad named argument attrs to 'tag' (keys must be strings, found " .. type( k ) .. ")", 2 )
end
if string.match( k, '[\t\r\n\f /<>"\'=]' ) then
error( "bad named argument attrs to 'tag' (invalid key '" .. k .. "')", 2 )
end
local tp = type( v )
if tp == 'boolean' then
if v then
ret[#ret+1] = ' ' .. k
end
elseif tp == 'string' or tp == 'number' then
ret[#ret+1] = string.format( ' %s="%s"', k, mwtext.encode( tostring( v ) ) )
else
error( "bad named argument attrs to 'tag' (value for key '" .. k .. "' may not be " .. tp .. ")", 2 )
end
end
local tp = type( content )
if content == nil then
ret[#ret+1] = '>'
elseif content == false then
ret[#ret+1] = ' />'
elseif tp == 'string' or tp == 'number' then
ret[#ret+1] = '>'
ret[#ret+1] = content
ret[#ret+1] = '</' .. name .. '>'
else
if named then
checkTypeForNamedArg( 'tag', 'content', content, 'string, number, nil, or false' )
else
checkType( 'tag', 3, content, 'string, number, nil, or false' )
end
end
return table.concat( ret )
end
function mwtext.unstrip( s )
return php.unstrip( s )
end
function mwtext.unstripNoWiki( s )
return php.unstripNoWiki( s )
end
function mwtext.killMarkers( s )
return php.killMarkers( s )
end
function mwtext.split( text, pattern, plain )
local ret = {}
for m in mwtext.gsplit( text, pattern, plain ) do
ret[#ret+1] = m
end
return ret
end
function mwtext.gsplit( text, pattern, plain )
local s, l = 1, mw.ustring.len( text )
return function ()
if s then
local e, n = mw.ustring.find( text, pattern, s, plain )
local ret
if not e then
ret = mw.ustring.sub( text, s )
s = nil
elseif n < e then
-- Empty separator!
ret = mw.ustring.sub( text, s, e )
if e < l then
s = e + 1
else
s = nil
end
else
ret = e > s and mw.ustring.sub( text, s, e - 1 ) or ''
s = n + 1
end
return ret
end
end, nil, nil
end
function mwtext.listToText( list, separator, conjunction )
separator = separator or options.comma
conjunction = conjunction or options['and']
local n = #list
local ret
if n > 1 then
ret = table.concat( list, separator, 1, n - 1 ) .. conjunction .. list[n]
else
ret = tostring( list[1] or '' )
end
return ret
end
function mwtext.truncate( text, length, ellipsis, adjustLength )
local l = mw.ustring.len( text )
if l <= math.abs( length ) then
return text
end
ellipsis = ellipsis or options.ellipsis
local elen = 0
if adjustLength then
elen = mw.ustring.len( ellipsis )
end
local ret
if math.abs( length ) <= elen then
ret = ellipsis
elseif length > 0 then
ret = mw.ustring.sub( text, 1, length - elen ) .. ellipsis
else
ret = ellipsis .. mw.ustring.sub( text, length + elen )
end
if mw.ustring.len( ret ) < l then
return ret
else
return text
end
end
return mwtext