mediawiki-extensions-Scribunto/engines/LuaCommon/lualib/ustring/make-tables.php
Brad Jorsch aa4d72e3ff Fix uncontroversial phpcs errors
The following continue to be ignored:
* Generic.Arrays.DisallowLongArraySyntax.Found, because I'm not sure
  Scribunto is ready to abandon old version support in master.
* MediaWiki.ControlStructures.AssignmentInControlStructures.AssignmentInControlStructures,
  because it's overly strict for its purpose.

Squiz.Classes.ValidClassName.NotCamelCaps isn't ignored globally, we
just ignore it explicitly every place it's needed.

Change-Id: I307668da6ef7b3e23da19b1fd1e08914239b99b3
2016-05-18 16:31:28 -04:00

155 lines
4.3 KiB
PHP
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/php
<?php
if ( PHP_SAPI !== 'cli' ) {
die( "This script may only be executed from the command line.\n" );
}
$chars = array();
for ( $i = 0; $i <= 0x10ffff; $i++ ) {
if ( $i < 0xd800 || $i > 0xdfff ) { // Skip UTF-16 surrogates
$chars[$i] = mb_convert_encoding( pack( 'N', $i ), 'UTF-8', 'UTF-32BE' );
}
}
### Uppercase and Lowercase mappings
echo "Creating upper and lower tables...\n";
$L = fopen( __DIR__ . '/lower.lua', 'w' );
if ( !$L ) {
die( "Failed to open lower.lua\n" );
}
$U = fopen( __DIR__ . '/upper.lua', 'w' );
if ( !$U ) {
die( "Failed to open upper.lua\n" );
}
fprintf( $L, "-- This file is automatically generated by make-tables.php\n" );
fprintf( $L, "return {\n" );
fprintf( $U, "-- This file is automatically generated by make-tables.php\n" );
fprintf( $U, "return {\n" );
foreach ( $chars as $i => $c ) {
$l = mb_strtolower( $c, 'UTF-8' );
$u = mb_strtoupper( $c, 'UTF-8' );
if ( $c !== $l ) {
fprintf( $L, "\t[\"%s\"] = \"%s\",\n", $c, $l );
}
if ( $c !== $u ) {
fprintf( $U, "\t[\"%s\"] = \"%s\",\n", $c, $u );
}
}
fprintf( $L, "}\n" );
fprintf( $U, "}\n" );
fclose( $L );
fclose( $U );
### Pattern code mappings
echo "Creating charsets table...\n";
$fh = fopen( __DIR__ . '/charsets.lua', 'w' );
if ( !$fh ) {
die( "Failed to open charsets.lua\n" );
}
$pats = array(
// These should match the expressions in UstringLibrary::patternToRegex()
'a' => array( '\p{L}', 'lu' ),
'c' => array( '\p{Cc}', null ),
'd' => array( '\p{Nd}', null ),
'l' => array( '\p{Ll}', null ),
'p' => array( '\p{P}', null ),
's' => array( '\p{Xps}', null ),
'u' => array( '\p{Lu}', null ),
'w' => array( null, 'da' ), # '[\p{L}\p{Nd}]' exactly matches 'a' + 'd'
'x' => array( '[0-9A-Fa-f---]', null ),
'z' => array( '\0', null ),
);
$ranges = array();
// @codingStandardsIgnoreLine MediaWiki.NamingConventions.PrefixedGlobalFunctions
function addRange( $k, $start, $end ) {
// @codingStandardsIgnoreLine MediaWiki.NamingConventions.ValidGlobalName
global $fh, $ranges;
// Speed/memory tradeoff
if ( !( $start >= 0x20 && $start < 0x7f ) && $end - $start >= 10 ) {
$ranges[$k][] = sprintf( "c >= 0x%06x and c < 0x%06x", $start, $end );
} else {
for ( $i = $start; $i < $end; $i++ ) {
fprintf( $fh, "\t\t[0x%06x] = 1,\n", $i );
}
}
}
fprintf( $fh, "-- This file is automatically generated by make-tables.php\n" );
fprintf( $fh, "local pats = {\n" );
foreach ( $pats as $k => $pp ) {
$ranges[$k] = array();
$re = $pp[0];
if ( !$re ) {
fprintf( $fh, "\t[0x%02x] = {},\n", ord( $k ) );
continue;
}
$re2 = 'fail';
if ( $pp[1] ) {
$re2 = array();
foreach ( str_split( $pp[1] ) as $p ) {
$re2[] = $pats[$p][0];
}
$re2 = join( '|', $re2 );
}
fprintf( $fh, "\t[0x%02x] = {\n", ord( $k ) );
$rstart = null;
foreach ( $chars as $i => $c ) {
if ( preg_match( "/^$re$/u", $c ) && !preg_match( "/^$re2$/u", $c ) ) {
if ( $rstart === null ) {
$rstart = $i;
}
} else {
if ( $rstart !== null ) {
addRange( $k, $rstart, $i );
$rstart = null;
}
}
}
if ( $rstart !== null ) {
addRange( $k, $rstart, 0x110000 );
}
fprintf( $fh, "\t},\n" );
}
foreach ( $pats as $k => $pp ) {
$kk = strtoupper( $k );
fprintf( $fh, "\t[0x%02x] = {},\n", ord( $kk ) );
}
fprintf( $fh, "}\n" );
foreach ( $pats as $k => $pp ) {
$body = '';
$check = array();
if ( $pp[1] ) {
foreach ( str_split( $pp[1] ) as $p ) {
$check[] = sprintf( "pats[0x%02x][k]", ord( $p ) );
}
}
if ( $ranges[$k] ) {
$body = "\tlocal c = tonumber( k ) or 0/0;\n";
$check = array_merge( $check, $ranges[$k] );
}
if ( $check ) {
$body .= "\treturn " . join( " or\n\t\t", $check );
fprintf( $fh, "setmetatable( pats[0x%02x], { __index = function ( t, k )\n%s\nend } )\n",
ord( $k ), $body );
}
}
foreach ( $pats as $k => $pp ) {
fprintf( $fh, "setmetatable( pats[0x%02x], { ", ord( strtoupper( $k ) ) );
fprintf( $fh, "__index = function ( t, k ) return k and not pats[0x%02x][k] end", ord( $k ) );
fprintf( $fh, " } )\n" );
}
fprintf( $fh, "\n-- For speed, cache printable ASCII characters in main tables\n" );
fprintf( $fh, "for k, t in pairs( pats ) do\n" );
fprintf( $fh, "\tif k >= 0x61 then\n" );
fprintf( $fh, "\t\tfor i = 0x20, 0x7e do\n" );
fprintf( $fh, "\t\t\tt[i] = t[i] or false\n" );
fprintf( $fh, "\t\tend\n" );
fprintf( $fh, "\tend\n" );
fprintf( $fh, "end\n" );
fprintf( $fh, "\nreturn pats\n" );
fclose( $fh );