mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/Scribunto
synced 2024-11-30 02:54:26 +00:00
258 lines
5.9 KiB
Lua
258 lines
5.9 KiB
Lua
|
local mwtext = {}
|
||
|
local php
|
||
|
local options
|
||
|
|
||
|
local util = require 'libraryUtil'
|
||
|
local checkType = util.checkType
|
||
|
local checkTypeForNamedArg = util.checkTypeForNamedArg
|
||
|
|
||
|
function mwtext.setupInterface( opts )
|
||
|
-- Boilerplate
|
||
|
mwtext.setupInterface = nil
|
||
|
php = mw_interface
|
||
|
mw_interface = nil
|
||
|
options = opts
|
||
|
|
||
|
-- Register this library in the "mw" global
|
||
|
mw = mw or {}
|
||
|
mw.text = mwtext
|
||
|
|
||
|
package.loaded['mw.text'] = mwtext
|
||
|
end
|
||
|
|
||
|
function mwtext.trim( s, charset )
|
||
|
charset = charset or '\t\r\n\f '
|
||
|
s = mw.ustring.gsub( s, '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
|
||
|
return s
|
||
|
end
|
||
|
|
||
|
local htmlencode_map = {
|
||
|
['>'] = '>',
|
||
|
['<'] = '<',
|
||
|
['&'] = '&',
|
||
|
['"'] = '"',
|
||
|
["'"] = ''',
|
||
|
['\194\160'] = '&#nbsp;',
|
||
|
}
|
||
|
local htmldecode_map = {}
|
||
|
for k, v in pairs( htmlencode_map ) do
|
||
|
htmldecode_map[v] = k
|
||
|
end
|
||
|
local decode_named_entities = nil
|
||
|
|
||
|
function mwtext.encode( s, charset )
|
||
|
charset = charset or '<>&"\'\194\160'
|
||
|
s = mw.ustring.gsub( s, '[' .. charset .. ']', function ( m )
|
||
|
if not htmlencode_map[m] then
|
||
|
local e = string.format( '&#%d;', mw.ustring.codepoint( m ) )
|
||
|
htmlencode_map[m] = e
|
||
|
htmldecode_map[e] = m
|
||
|
end
|
||
|
return htmlencode_map[m]
|
||
|
end )
|
||
|
return s
|
||
|
end
|
||
|
|
||
|
function mwtext.decode( s, decodeNamedEntities )
|
||
|
local dec
|
||
|
if decodeNamedEntities then
|
||
|
if decode_named_entities == nil then
|
||
|
decode_named_entities = php.getEntityTable()
|
||
|
setmetatable( decode_named_entities, { __index = htmldecode_map } )
|
||
|
end
|
||
|
dec = decode_named_entities
|
||
|
else
|
||
|
dec = htmldecode_map
|
||
|
end
|
||
|
-- string.gsub is safe here, because only ASCII chars are in the pattern
|
||
|
s = string.gsub( s, '(&(#?x?)([a-zA-Z0-9]+);)', function ( m, flg, name )
|
||
|
if not dec[m] then
|
||
|
local n = nil
|
||
|
if flg == '#' then
|
||
|
n = tonumber( name, 10 )
|
||
|
elseif flg == '#x' then
|
||
|
n = tonumber( name, 16 )
|
||
|
end
|
||
|
if n and n <= 0x10ffff then
|
||
|
n = mw.ustring.char( n )
|
||
|
if n then
|
||
|
htmldecode_map[m] = n
|
||
|
htmlencode_map[n] = m
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
return dec[m]
|
||
|
end )
|
||
|
return s
|
||
|
end
|
||
|
|
||
|
local nowikiRepl1 = {
|
||
|
['"'] = '"',
|
||
|
['&'] = '&',
|
||
|
["'"] = ''',
|
||
|
['<'] = '<',
|
||
|
['='] = '=',
|
||
|
['>'] = '>',
|
||
|
['['] = '[',
|
||
|
[']'] = ']',
|
||
|
['{'] = '{',
|
||
|
['|'] = '|',
|
||
|
['}'] = '}',
|
||
|
}
|
||
|
|
||
|
local nowikiRepl2 = {
|
||
|
["\n#"] = "\n#",
|
||
|
["\n*"] = "\n*",
|
||
|
["\n:"] = "\n:",
|
||
|
["\n;"] = "\n;",
|
||
|
}
|
||
|
|
||
|
function mwtext.nowiki( s )
|
||
|
-- string.gsub is safe here, because we're only caring about ASCII chars
|
||
|
s = string.gsub( s, '["&\'<=>%[%]{|}]', nowikiRepl1 )
|
||
|
s = string.sub( string.gsub( '\n' .. s, '\n[#*:;]', nowikiRepl2 ), 2 )
|
||
|
s = string.gsub( s, '://', '://' )
|
||
|
s = string.gsub( s, 'ISBN ', 'ISBN ' )
|
||
|
s = string.gsub( s, 'RFC ', 'RFC ' )
|
||
|
|
||
|
return s
|
||
|
end
|
||
|
|
||
|
function mwtext.tag( name, attrs, content )
|
||
|
local named = false
|
||
|
if type( name ) == 'table' then
|
||
|
named = true
|
||
|
name, attrs, content = name.name, name.attrs, name.content
|
||
|
checkTypeForNamedArg( 'tag', 'name', name, 'string' )
|
||
|
checkTypeForNamedArg( 'tag', 'attrs', attrs, 'table', true )
|
||
|
else
|
||
|
checkType( 'tag', 1, name, 'string' )
|
||
|
checkType( 'tag', 2, attrs, 'table', true )
|
||
|
end
|
||
|
|
||
|
local ret = { '<' .. name }
|
||
|
for k, v in pairs( attrs or {} ) do
|
||
|
if type( k ) ~= 'string' then
|
||
|
error( "bad named argument attrs to 'tag' (keys must be strings, found " .. type( k ) .. ")", 2 )
|
||
|
end
|
||
|
if string.match( k, '[\t\r\n\f /<>"\'=]' ) then
|
||
|
error( "bad named argument attrs to 'tag' (invalid key '" .. k .. "')", 2 )
|
||
|
end
|
||
|
local tp = type( v )
|
||
|
if tp == 'boolean' then
|
||
|
if v then
|
||
|
ret[#ret+1] = ' ' .. k
|
||
|
end
|
||
|
elseif tp == 'string' or tp == 'number' then
|
||
|
ret[#ret+1] = string.format( ' %s="%s"', k, mwtext.encode( tostring( v ) ) )
|
||
|
else
|
||
|
error( "bad named argument attrs to 'tag' (value for key '" .. k .. "' may not be " .. tp .. ")", 2 )
|
||
|
end
|
||
|
end
|
||
|
|
||
|
local tp = type( content )
|
||
|
if content == nil then
|
||
|
ret[#ret+1] = '>'
|
||
|
elseif content == false then
|
||
|
ret[#ret+1] = ' />'
|
||
|
elseif tp == 'string' or tp == 'number' then
|
||
|
ret[#ret+1] = '>'
|
||
|
ret[#ret+1] = content
|
||
|
ret[#ret+1] = '</' .. name .. '>'
|
||
|
else
|
||
|
if named then
|
||
|
checkTypeForNamedArg( 'tag', 'content', content, 'string, number, nil, or false' )
|
||
|
else
|
||
|
checkType( 'tag', 3, content, 'string, number, nil, or false' )
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return table.concat( ret )
|
||
|
end
|
||
|
|
||
|
function mwtext.unstrip( s )
|
||
|
return php.unstrip( s )
|
||
|
end
|
||
|
|
||
|
function mwtext.split( text, pattern, plain )
|
||
|
local ret = {}
|
||
|
for m in mwtext.gsplit( text, pattern, plain ) do
|
||
|
ret[#ret+1] = m
|
||
|
end
|
||
|
return ret
|
||
|
end
|
||
|
|
||
|
function mwtext.gsplit( text, pattern, plain )
|
||
|
local s, l = 1, mw.ustring.len( text )
|
||
|
return function ()
|
||
|
if s then
|
||
|
local e, n = mw.ustring.find( text, pattern, s, plain )
|
||
|
local ret
|
||
|
if not e then
|
||
|
ret = mw.ustring.sub( text, s )
|
||
|
s = nil
|
||
|
elseif n < e then
|
||
|
-- Empty separator!
|
||
|
ret = mw.ustring.sub( text, s, e )
|
||
|
if e < l then
|
||
|
s = e + 1
|
||
|
else
|
||
|
s = nil
|
||
|
end
|
||
|
else
|
||
|
ret = e > s and mw.ustring.sub( text, s, e - 1 ) or ''
|
||
|
s = n + 1
|
||
|
end
|
||
|
return ret
|
||
|
end
|
||
|
end, nil, nil
|
||
|
end
|
||
|
|
||
|
function mwtext.listToText( list, separator, conjunction )
|
||
|
separator = separator or options.comma
|
||
|
conjunction = conjunction or options['and']
|
||
|
local n = #list
|
||
|
|
||
|
local ret
|
||
|
if n > 1 then
|
||
|
local tmp = list[n-1]
|
||
|
list[n-1] = list[n-1] .. conjunction .. list[n]
|
||
|
ret = table.concat( list, separator, 1, n - 1 )
|
||
|
list[n-1] = tmp
|
||
|
else
|
||
|
ret = tostring( list[1] or '' )
|
||
|
end
|
||
|
|
||
|
return ret
|
||
|
end
|
||
|
|
||
|
function mwtext.truncate( text, length, ellipsis, adjustLength )
|
||
|
local l = mw.ustring.len( text )
|
||
|
if l <= math.abs( length ) then
|
||
|
return text
|
||
|
end
|
||
|
|
||
|
ellipsis = ellipsis or options.ellipsis
|
||
|
local elen = 0
|
||
|
if adjustLength then
|
||
|
elen = mw.ustring.len( ellipsis )
|
||
|
end
|
||
|
|
||
|
local ret
|
||
|
if math.abs( length ) <= elen then
|
||
|
ret = ellipsis
|
||
|
elseif length > 0 then
|
||
|
ret = mw.ustring.sub( text, 1, length - elen ) .. ellipsis
|
||
|
else
|
||
|
ret = ellipsis .. mw.ustring.sub( text, length + elen )
|
||
|
end
|
||
|
|
||
|
if mw.ustring.len( ret ) < l then
|
||
|
return ret
|
||
|
else
|
||
|
return text
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return mwtext
|