Merge "Add text module"

This commit is contained in:
jenkins-bot 2013-03-27 16:55:07 +00:00 committed by Gerrit Code Review
commit a85275592c
8 changed files with 565 additions and 0 deletions

View file

@ -112,6 +112,7 @@ $wgAutoloadClasses['Scribunto_LuaUstringLibrary'] = $dir.'engines/LuaCommon/Ustr
$wgAutoloadClasses['Scribunto_LuaLanguageLibrary'] = $dir.'engines/LuaCommon/LanguageLibrary.php';
$wgAutoloadClasses['Scribunto_LuaMessageLibrary'] = $dir.'engines/LuaCommon/MessageLibrary.php';
$wgAutoloadClasses['Scribunto_LuaTitleLibrary'] = $dir.'engines/LuaCommon/TitleLibrary.php';
$wgAutoloadClasses['Scribunto_LuaTextLibrary'] = $dir.'engines/LuaCommon/TextLibrary.php';
/***** Configuration *****/

View file

@ -315,6 +315,7 @@ WIKI;
'engines/LuaCommon/UstringLibraryTest.php',
'engines/LuaCommon/MessageLibraryTest.php',
'engines/LuaCommon/TitleLibraryTest.php',
'engines/LuaCommon/TextLibraryTest.php',
);
foreach ( $tests as $test ) {
$files[] = dirname( __FILE__ ) .'/../tests/' . $test;

View file

@ -12,6 +12,7 @@ abstract class Scribunto_LuaEngine extends ScribuntoEngineBase {
'mw.language' => 'Scribunto_LuaLanguageLibrary',
'mw.message' => 'Scribunto_LuaMessageLibrary',
'mw.title' => 'Scribunto_LuaTitleLibrary',
'mw.text' => 'Scribunto_LuaTextLibrary',
);
/**

View file

@ -0,0 +1,31 @@
<?php
class Scribunto_LuaTextLibrary extends Scribunto_LuaLibraryBase {
function register() {
$lib = array(
'unstrip' => array( $this, 'textUnstrip' ),
'getEntityTable' => array( $this, 'getEntityTable' ),
);
$this->getEngine()->registerInterface( 'mw.text.lua', $lib, array(
'comma' => wfMessage( 'comma-separator' )->inContentLanguage()->text(),
'and' => wfMessage( 'and' )->inContentLanguage()->text() .
wfMessage( 'word-separator' )->inContentLanguage()->text(),
'ellipsis' => wfMessage( 'ellipsis' )->inContentLanguage()->text(),
) );
}
function textUnstrip( $s ) {
$this->checkType( 'unstrip', 1, $s, 'string' );
return array( $this->getParser()->mStripState->unstripBoth( $s ) );
}
function getEntityTable() {
$flags = ENT_QUOTES;
// PHP 5.3 compat
if ( defined( "ENT_HTML5" ) ) {
$flags |= constant( "ENT_HTML5" );
}
$table = array_flip( get_html_translation_table( HTML_ENTITIES, $flags, "UTF-8" ) );
return array( $table );
}
}

View file

@ -21,6 +21,18 @@ function libraryUtil.checkTypeForIndex( index, value, expectType )
end
end
function libraryUtil.checkTypeForNamedArg( name, argName, arg, expectType, nilOk )
if arg == nil and nilOk then
return
end
if type( arg ) ~= expectType then
local msg = string.format( "bad named argument %s to '%s' (%s expected, got %s)",
argName, name, expectType, type( arg )
)
error( msg, 3 )
end
end
function libraryUtil.makeCheckSelfFunction( libraryName, varName, selfObj, selfObjDesc )
return function ( self, method )
if self ~= selfObj then

View file

@ -0,0 +1,257 @@
local mwtext = {}
local php
local options
local util = require 'libraryUtil'
local checkType = util.checkType
local checkTypeForNamedArg = util.checkTypeForNamedArg
function mwtext.setupInterface( opts )
-- Boilerplate
mwtext.setupInterface = nil
php = mw_interface
mw_interface = nil
options = opts
-- Register this library in the "mw" global
mw = mw or {}
mw.text = mwtext
package.loaded['mw.text'] = mwtext
end
function mwtext.trim( s, charset )
charset = charset or '\t\r\n\f '
s = mw.ustring.gsub( s, '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
return s
end
local htmlencode_map = {
['>'] = '&gt;',
['<'] = '&lt;',
['&'] = '&amp;',
['"'] = '&quot;',
["'"] = '&#039;',
['\194\160'] = '&#nbsp;',
}
local htmldecode_map = {}
for k, v in pairs( htmlencode_map ) do
htmldecode_map[v] = k
end
local decode_named_entities = nil
function mwtext.encode( s, charset )
charset = charset or '<>&"\'\194\160'
s = mw.ustring.gsub( s, '[' .. charset .. ']', function ( m )
if not htmlencode_map[m] then
local e = string.format( '&#%d;', mw.ustring.codepoint( m ) )
htmlencode_map[m] = e
htmldecode_map[e] = m
end
return htmlencode_map[m]
end )
return s
end
function mwtext.decode( s, decodeNamedEntities )
local dec
if decodeNamedEntities then
if decode_named_entities == nil then
decode_named_entities = php.getEntityTable()
setmetatable( decode_named_entities, { __index = htmldecode_map } )
end
dec = decode_named_entities
else
dec = htmldecode_map
end
-- string.gsub is safe here, because only ASCII chars are in the pattern
s = string.gsub( s, '(&(#?x?)([a-zA-Z0-9]+);)', function ( m, flg, name )
if not dec[m] then
local n = nil
if flg == '#' then
n = tonumber( name, 10 )
elseif flg == '#x' then
n = tonumber( name, 16 )
end
if n and n <= 0x10ffff then
n = mw.ustring.char( n )
if n then
htmldecode_map[m] = n
htmlencode_map[n] = m
end
end
end
return dec[m]
end )
return s
end
local nowikiRepl1 = {
['"'] = '&#34;',
['&'] = '&#38;',
["'"] = '&#39;',
['<'] = '&#60;',
['='] = '&#61;',
['>'] = '&#62;',
['['] = '&#91;',
[']'] = '&#93;',
['{'] = '&#123;',
['|'] = '&#124;',
['}'] = '&#125;',
}
local nowikiRepl2 = {
["\n#"] = "\n&#35;",
["\n*"] = "\n&#42;",
["\n:"] = "\n&#58;",
["\n;"] = "\n&#59;",
}
function mwtext.nowiki( s )
-- string.gsub is safe here, because we're only caring about ASCII chars
s = string.gsub( s, '["&\'<=>%[%]{|}]', nowikiRepl1 )
s = string.sub( string.gsub( '\n' .. s, '\n[#*:;]', nowikiRepl2 ), 2 )
s = string.gsub( s, '://', '&#58;//' )
s = string.gsub( s, 'ISBN ', 'ISBN&#32;' )
s = string.gsub( s, 'RFC ', 'RFC&#32;' )
return s
end
function mwtext.tag( name, attrs, content )
local named = false
if type( name ) == 'table' then
named = true
name, attrs, content = name.name, name.attrs, name.content
checkTypeForNamedArg( 'tag', 'name', name, 'string' )
checkTypeForNamedArg( 'tag', 'attrs', attrs, 'table', true )
else
checkType( 'tag', 1, name, 'string' )
checkType( 'tag', 2, attrs, 'table', true )
end
local ret = { '<' .. name }
for k, v in pairs( attrs or {} ) do
if type( k ) ~= 'string' then
error( "bad named argument attrs to 'tag' (keys must be strings, found " .. type( k ) .. ")", 2 )
end
if string.match( k, '[\t\r\n\f /<>"\'=]' ) then
error( "bad named argument attrs to 'tag' (invalid key '" .. k .. "')", 2 )
end
local tp = type( v )
if tp == 'boolean' then
if v then
ret[#ret+1] = ' ' .. k
end
elseif tp == 'string' or tp == 'number' then
ret[#ret+1] = string.format( ' %s="%s"', k, mwtext.encode( tostring( v ) ) )
else
error( "bad named argument attrs to 'tag' (value for key '" .. k .. "' may not be " .. tp .. ")", 2 )
end
end
local tp = type( content )
if content == nil then
ret[#ret+1] = '>'
elseif content == false then
ret[#ret+1] = ' />'
elseif tp == 'string' or tp == 'number' then
ret[#ret+1] = '>'
ret[#ret+1] = content
ret[#ret+1] = '</' .. name .. '>'
else
if named then
checkTypeForNamedArg( 'tag', 'content', content, 'string, number, nil, or false' )
else
checkType( 'tag', 3, content, 'string, number, nil, or false' )
end
end
return table.concat( ret )
end
function mwtext.unstrip( s )
return php.unstrip( s )
end
function mwtext.split( text, pattern, plain )
local ret = {}
for m in mwtext.gsplit( text, pattern, plain ) do
ret[#ret+1] = m
end
return ret
end
function mwtext.gsplit( text, pattern, plain )
local s, l = 1, mw.ustring.len( text )
return function ()
if s then
local e, n = mw.ustring.find( text, pattern, s, plain )
local ret
if not e then
ret = mw.ustring.sub( text, s )
s = nil
elseif n < e then
-- Empty separator!
ret = mw.ustring.sub( text, s, e )
if e < l then
s = e + 1
else
s = nil
end
else
ret = e > s and mw.ustring.sub( text, s, e - 1 ) or ''
s = n + 1
end
return ret
end
end, nil, nil
end
function mwtext.listToText( list, separator, conjunction )
separator = separator or options.comma
conjunction = conjunction or options['and']
local n = #list
local ret
if n > 1 then
local tmp = list[n-1]
list[n-1] = list[n-1] .. conjunction .. list[n]
ret = table.concat( list, separator, 1, n - 1 )
list[n-1] = tmp
else
ret = tostring( list[1] or '' )
end
return ret
end
function mwtext.truncate( text, length, ellipsis, adjustLength )
local l = mw.ustring.len( text )
if l <= math.abs( length ) then
return text
end
ellipsis = ellipsis or options.ellipsis
local elen = 0
if adjustLength then
elen = mw.ustring.len( ellipsis )
end
local ret
if math.abs( length ) <= elen then
ret = ellipsis
elseif length > 0 then
ret = mw.ustring.sub( text, 1, length - elen ) .. ellipsis
else
ret = ellipsis .. mw.ustring.sub( text, length + elen )
end
if mw.ustring.len( ret ) < l then
return ret
else
return text
end
end
return mwtext

View file

@ -0,0 +1,23 @@
<?php
class Scribunto_LuaTextLibraryTests extends Scribunto_LuaEngineTestBase {
protected static $moduleName = 'TextLibraryTests';
function setUp() {
parent::setUp();
// For unstrip test
$interpreter = $this->getEngine()->getInterpreter();
$interpreter->callFunction(
$interpreter->loadString( 'mw.text.stripTest = ...', 'fortest' ),
$this->getEngine()->getParser()->insertStripItem( 'ok' )
);
}
function getTestModules() {
return parent::getTestModules() + array(
'TextLibraryTests' => __DIR__ . '/TextLibraryTests.lua',
);
}
}

View file

@ -0,0 +1,239 @@
local testframework = require 'Module:TestFramework'
-- Force the argument list to be ordered
local tagattrs = { absent = false, present = true, key = 'value', n = 42 }
setmetatable( tagattrs, { __pairs = function ( t )
local keys = { 'absent', 'present', 'key', 'n' }
local i = 0
return function()
i = i + 1
if i <= #keys then
return keys[i], t[keys[i]]
end
end
end } )
-- Tests
local tests = {
{ name = 'trim',
func = mw.text.trim, args = { ' foo bar ' },
expect = { 'foo bar' }
},
{ name = 'trim right',
func = mw.text.trim, args = { 'foo bar ' },
expect = { 'foo bar' }
},
{ name = 'trim left',
func = mw.text.trim, args = { ' foo bar' },
expect = { 'foo bar' }
},
{ name = 'trim none',
func = mw.text.trim, args = { 'foo bar' },
expect = { 'foo bar' }
},
{ name = 'trim charset',
func = mw.text.trim, args = { 'xxx foo bar xxx', 'x' },
expect = { ' foo bar ' }
},
{ name = 'encode',
func = mw.text.encode, args = { '<b>foo "bar"</b> & \'baz\'' },
expect = { '&lt;b&gt;foo &quot;bar&quot;&lt;/b&gt; &amp; &#039;baz&#039;' }
},
{ name = 'encode charset',
func = mw.text.encode, args = { '<b>foo "bar"</b> & \'baz\'', 'aeiou' },
expect = { '<b>f&#111;&#111; "b&#97;r"</b> & \'b&#97;z\'' }
},
{ name = 'decode',
func = mw.text.decode,
args = { '&lt;&gt;&amp;&quot; &#102;&#111;&#x6f; &#x0066;&#00111;&#x6F; &hearts; &amp;quot;' },
expect = { '<>&" foo foo &hearts; &quot;' }
},
{ name = 'decode named',
func = mw.text.decode,
args = { '&lt;&gt;&amp;&quot; &#102;&#111;&#x6f; &#x0066;&#00111;&#x6F; &hearts; &amp;quot;', true },
expect = { '<>&" foo foo ♥ &quot;' }
},
{ name = 'nowiki',
func = mw.text.nowiki,
args = { '*"&\'<=>[]{|}#*:;\n*\n#\n:\n;\nhttp://example.com:80/\nRFC 123, ISBN 456' },
expect = {
'&#42;&#34;&#38;&#39;&#60;&#61;&#62;&#91;&#93;&#123;&#124;&#125;#*:;' ..
'\n&#42;\n&#35;\n&#58;\n&#59;\nhttp&#58;//example.com:80/' ..
'\nRFC&#32;123, ISBN&#32;456'
}
},
{ name = 'tag, simple',
func = mw.text.tag,
args = { { name = 'b' } },
expect = { '<b>' }
},
{ name = 'tag, simple with content',
func = mw.text.tag,
args = { { name = 'b', content = 'foo' } },
expect = { '<b>foo</b>' }
},
{ name = 'tag, simple self-closing',
func = mw.text.tag,
args = { { name = 'br', content = false } },
expect = { '<br />' }
},
{ name = 'tag, args',
func = mw.text.tag,
args = { { name = 'b', attrs = tagattrs } },
expect = { '<b present key="value" n="42">' }
},
{ name = 'tag, args with content',
func = mw.text.tag,
args = { { name = 'b', attrs = tagattrs, content = 'foo' } },
expect = { '<b present key="value" n="42">foo</b>' }
},
{ name = 'tag, args self-closing',
func = mw.text.tag,
args = { { name = 'br', attrs = tagattrs, content = false } },
expect = { '<br present key="value" n="42" />' }
},
{ name = 'tag, args, positional params',
func = mw.text.tag,
args = { 'b', tagattrs },
expect = { '<b present key="value" n="42">' }
},
{ name = 'tag, args with content, positional params',
func = mw.text.tag,
args = { 'b', tagattrs, 'foo' },
expect = { '<b present key="value" n="42">foo</b>' }
},
{ name = 'unstrip',
func = mw.text.unstrip, args = { mw.text.stripTest },
expect = { 'ok' }
},
{ name = 'split, simple',
func = mw.text.split, args = { 'a,b,c,d', ',' },
expect = { { 'a', 'b', 'c', 'd' } }
},
{ name = 'split, no separator',
func = mw.text.split, args = { 'xxx', ',' },
expect = { { 'xxx' } }
},
{ name = 'split, empty string',
func = mw.text.split, args = { '', ',' },
expect = { { '' } }
},
{ name = 'split, with empty items',
func = mw.text.split, args = { ',,', ',' },
expect = { { '', '', '' } }
},
{ name = 'split, with empty items (1)',
func = mw.text.split, args = { 'x,,', ',' },
expect = { { 'x', '', '' } }
},
{ name = 'split, with empty items (2)',
func = mw.text.split, args = { ',x,', ',' },
expect = { { '', 'x', '' } }
},
{ name = 'split, with empty items (3)',
func = mw.text.split, args = { ',,x', ',' },
expect = { { '', '', 'x' } }
},
{ name = 'split, with empty items (4)',
func = mw.text.split, args = { ',x,x', ',' },
expect = { { '', 'x', 'x' } }
},
{ name = 'split, with empty items (5)',
func = mw.text.split, args = { 'x,,x', ',' },
expect = { { 'x', '', 'x' } }
},
{ name = 'split, with empty items (7)',
func = mw.text.split, args = { 'x,x,', ',' },
expect = { { 'x', 'x', '' } }
},
{ name = 'split, with empty pattern',
func = mw.text.split, args = { 'xxx', '' },
expect = { { 'x', 'x', 'x' } }
},
{ name = 'split, with empty pattern (2)',
func = mw.text.split, args = { 'xxx', ',?' },
expect = { { 'x', 'x', 'x' } }
},
{ name = 'listToText (0)',
func = mw.text.listToText, args = { {} },
expect = { '' }
},
{ name = 'listToText (1)',
func = mw.text.listToText, args = { { 1 } },
expect = { '1' }
},
{ name = 'listToText (2)',
func = mw.text.listToText, args = { { 1, 2 } },
expect = { '1 and 2' }
},
{ name = 'listToText (3)',
func = mw.text.listToText, args = { { 1, 2, 3 } },
expect = { '1, 2 and 3' }
},
{ name = 'listToText (4)',
func = mw.text.listToText, args = { { 1, 2, 3, 4 } },
expect = { '1, 2, 3 and 4' }
},
{ name = 'listToText, alternate separator',
func = mw.text.listToText, args = { { 1, 2, 3, 4 }, '; ' },
expect = { '1; 2; 3 and 4' }
},
{ name = 'listToText, alternate conjunction',
func = mw.text.listToText, args = { { 1, 2, 3, 4 }, nil, ' or ' },
expect = { '1, 2, 3 or 4' }
},
{ name = 'truncate, no truncation',
func = mw.text.truncate, args = { 'foobarbaz', 9 },
expect = { 'foobarbaz' }
},
{ name = 'truncate, no truncation (2)',
func = mw.text.truncate, args = { 'foobarbaz', -9 },
expect = { 'foobarbaz' }
},
{ name = 'truncate, tail truncation',
func = mw.text.truncate, args = { 'foobarbaz', 3 },
expect = { 'foo...' }
},
{ name = 'truncate, head truncation',
func = mw.text.truncate, args = { 'foobarbaz', -3 },
expect = { '...baz' }
},
{ name = 'truncate, avoid silly truncation',
func = mw.text.truncate, args = { 'foobarbaz', 8 },
expect = { 'foobarbaz' }
},
{ name = 'truncate, avoid silly truncation (2)',
func = mw.text.truncate, args = { 'foobarbaz', 6 },
expect = { 'foobarbaz' }
},
{ name = 'truncate, alternate ellipsis',
func = mw.text.truncate, args = { 'foobarbaz', 3, '!' },
expect = { 'foo!' }
},
{ name = 'truncate, with adjusted length',
func = mw.text.truncate, args = { 'foobarbaz', 6, nil, true },
expect = { 'foo...' }
},
{ name = 'truncate, with adjusted length (2)',
func = mw.text.truncate, args = { 'foobarbaz', -6, nil, true },
expect = { '...baz' }
},
{ name = 'truncate, ridiculously short',
func = mw.text.truncate, args = { 'foobarbaz', 1, nil, true },
expect = { '...' }
},
{ name = 'truncate, ridiculously short (2)',
func = mw.text.truncate, args = { 'foobarbaz', -1, nil, true },
expect = { '...' }
},
}
return testframework.getTestProvider( tests )