From 0880f444b1f8476dd2ab85720e5cfe4648f2f6fc Mon Sep 17 00:00:00 2001 From: Andrew Garrett Date: Wed, 11 Feb 2009 18:23:21 +0000 Subject: [PATCH] Abuse Filter Parser updates: * Use strcspn to scan ahead for long regions of uninteresting text in string handling (performance). * Remove cruft specific to my system in phpTest.php. * Remove a test that was in incorrect syntax, and useless without adding variable support. --- AbuseFilter.parser.php | 12 +++++++----- phpTest.php | 12 ++++++++---- tests/whitespace.t | 2 -- 3 files changed, 15 insertions(+), 11 deletions(-) delete mode 100644 tests/whitespace.t diff --git a/AbuseFilter.parser.php b/AbuseFilter.parser.php index dde505a5a..3663af261 100644 --- a/AbuseFilter.parser.php +++ b/AbuseFilter.parser.php @@ -743,7 +743,13 @@ class AbuseFilterParser { return array( $tok, AFPToken::TString, $code, $offset ); } - if( $code[$offset] == '\\' ) { + // Performance: Use a PHP function (implemented in C) + // to scan ahead. + $addLength = strcspn( $code, $type."\\", $offset ); + if ($addLength) { + $tok .= substr( $code, $offset, $addLength ); + $offset += $addLength; + } elseif( $code[$offset] == '\\' ) { if( $code[$offset + 1] == '\\' ) $tok .= '\\'; elseif( $code[$offset + 1] == $type ) @@ -893,10 +899,6 @@ class AbuseFilterParser { 'unrecognisedtoken', $offset, array( substr( $code, $offset ) ) ); } - protected static function isDigitOrDot( $chr ) { - return ctype_alnum( $chr ) || $chr == '.'; - } - protected static function isValidIdSymbol( $chr ) { return ctype_alnum( $chr ) || $chr == '_'; } diff --git a/phpTest.php b/phpTest.php index b109aeda7..56a553bfb 100644 --- a/phpTest.php +++ b/phpTest.php @@ -4,7 +4,10 @@ * Runs tests against the PHP parser. */ -require( '/home/andrew/mediawiki/maintenance/commandLine.inc' ); +require_once ( getenv('MW_INSTALL_PATH') !== false + ? getenv('MW_INSTALL_PATH')."/maintenance/commandLine.inc" + : dirname( __FILE__ ) . '/../../maintenance/commandLine.inc' ); + $tester = new AbuseFilterParser; wfLoadExtensionMessages( 'AbuseFilter' ); @@ -16,9 +19,6 @@ $check = 0; $pass = 0; foreach( $tests as $test ) { - if( in_string( 'whitespace.t', $test ) ) - continue; // Skip it. Or add preset variables support to the parser - $result = substr($test,0,-2).".r"; $rule = trim(file_get_contents( $test )); @@ -45,6 +45,10 @@ foreach( $tests as $test ) { } } catch (AFPException $excep) { print "-FAILED - exception ".$excep->getMessage()." with input $rule\n"; + + // export + $vars = var_export( $tester->mTokens, true ); + file_put_contents( $test.'.parsed', $vars ); } } diff --git a/tests/whitespace.t b/tests/whitespace.t deleted file mode 100644 index cf595a46a..000000000 --- a/tests/whitespace.t +++ /dev/null @@ -1,2 +0,0 @@ -1 == FOO -FOO=1