Linting and slight tweak to parser

- add a tweak to the parser (keep the value tokens in a declaration separate in the parsed tree to ease matching at render time - add error checking to encoding/decoding the templatestyles property - pick some lint suggested by Brion and Bryan Change-Id: If60b91e119102c0f0f559fe7e5a4c421c94b7ff4
2024-11-28 02:00:20 +00:00 · 2016-04-10 09:41:46 -04:00 · 2016-04-10 09:41:46 -04:00 · dfa7e27b5a
parent b35763edbf
commit dfa7e27b5a
6 changed files with 126 additions and 63 deletions
--- a/CSSParser.php
+++ b/CSSParser.php
@ -27,47 +27,71 @@ class CSSParser {
 	function __construct( $css ) {
 		preg_match_all( '/(
 			  [ \n\t]+
+				(?# Sequences of whitespace )
 			| \/\* (?: [^*]+ | \*[^\/] )* \*\/ [ \n\t]*
+				(?# Comments and any trailing whitespace )
 			| " (?: [^"\\\\\n]+ | \\\\\. )* ["\n]
+				(?# Double-quoted string literals (to newline when unclosed )
 			| \' (?: [^\'\\\\\n]+ | \\\\\. )* [\'\n]
+				(#? Single-quoted string literals (to newline when unclosed )
 			| [+-]? (?: [0-9]* \. )? [0-9]+ (?: [_a-z][_a-z0-9-]* | % )?
-			| url [ \n\t]* \(
+				(#? Numerical literals - including optional trailing units or percent sign )
 			| @? -? (?: [_a-z] | \\\\[0-9a-f]{1,6} [ \n\t]? )
-			        (?: [_a-z0-9-]+ | \\\\[0-9a-f]{1,6} [ \n\t]? | [^\0-\177] )*
+			        (?: [_a-z0-9-]+ | \\\\[0-9a-f]{1,6} [ \n\t]? | [^\0-\177] )* (?: [ \n\t]* \( )?
+				(#? Identifiers - including leading `@` for at-rule blocks )
+				(#? Trailing open captures are captured to match functional values )
 			| \# (?: [_a-z0-9-]+ | \\\\[0-9a-f]{1,6} [ \n\t]? | [^\0-\177] )*
+				(#? So-called hatch literals )
 			| u\+ [0-9a-f]{1,6} (?: - [0-9a-f]{1,6} )?
+				(#? Unicode range literals )
 			| u\+ [0-9a-f?]{1,6}
-			| <!--
-			| -->
-			| .)/xis', $css, $match );
+				(#? Unicode mask literals )
+			| .)
+				(#? Any unmatched token is reduced to single characters )
+			/xis', $css, $match );

 		$space = false;
 		foreach ( $match[0] as $t ) {
-			if ( preg_match( '/^(?:[ \n\t]|\/\*|<!--|-->)/', $t ) ) {
+			if ( preg_match( '/^(?:[ \n\t]|\/\*)/', $t ) ) {
+
+				// Fold any sequence of whitespace to a single space token
+
 				if ( !$space ) {
 					$space = true;
 					$this->tokens[] = ' ';
 					continue;
 				}
+
 			} else {
+
+				// decode any hexadecimal escape character into their corresponding UTF-8
+				// sequence - our output is UTF-8 so the escaping is unnecessary and this
+				// prevents trying to obfuscate ASCII in identifiers to prevent matches.
+
+				$t = preg_replace_callback( '/\\\\([0-9a-f]{1,6})[ \n\t]?/', function( $match ) {
+						return html_entity_decode( '&#'.$match[1].';', ENT_NOQUOTES, 'UTF-8' );
+				}, $t );
 				$space = false;
 				$this->tokens[] = $t;
+
 			}
 		}
 		$this->index = 0;
 	}

 	private function peek( $i ) {
-		if ( $this->index+$i >= count( $this->tokens ) )
+		if ( $this->index+$i >= count( $this->tokens ) ) {
 			return null;
+		}
 		return $this->tokens[$this->index+$i];
 	}

 	private function consume( $num = 1 ) {
 		if ( $num > 0 ) {
-			if ( $this->index+$num >= count( $this->tokens ) )
+			if ( $this->index+$num >= count( $this->tokens ) ) {
 				$num = count( $this->tokens ) - $this->index;
-			$text = implode( array_slice( $this->tokens, $this->index, $num ) );
+			}
+			$text = array_slice( $this->tokens, $this->index, $num );
 			$this->index += $num;
 			return $text;
 		}
@ -76,15 +100,17 @@ class CSSParser {

 	private function consumeTo( $delim ) {
 		$consume = 0;
-		while ( !in_array( $this->peek( $consume ), $delim ) )
+		while ( !in_array( $this->peek( $consume ), $delim ) ) {
 			$consume++;
+		}
 		return $this->consume( $consume );
 	}

 	private function consumeWS() {
 		$consume = 0;
-		while ( $this->peek( $consume ) === ' ' )
+		while ( $this->peek( $consume ) === ' ' ) {
 			$consume++;
+		}
 		return $this->consume( $consume );
 	}

@ -99,7 +125,7 @@ class CSSParser {
 	 */
 	private function parseDecl() {
 		$this->consumeWS();
-		$name = $this->consume();
+		$name = $this->consume()[0];
 		$this->consumeWS();
 		if ( $this->peek( 0 )!=':' ) {
 			$this->consumeTo( [';', '}', null] );
@ -113,7 +139,7 @@ class CSSParser {
 		$this->consumeWS();
 		$value = $this->consumeTo( [';', '}', null] );
 		if ( $this->peek( 0 ) == ';' ) {
-			$value .= $this->consume();
+			$this->consume();
 			$this->consumeWS();
 		}
 		return [ $name => $value ];
@ -132,12 +158,15 @@ class CSSParser {
 		$decls = [];
 		while ( $this->peek( 0 ) !== null and $this->peek( 0 ) != '}' ) {
 			$decl = $this->parseDecl();
-			if ( $decl )
-				foreach ( $decl as $k => $d )
+			if ( $decl ) {
+				foreach ( $decl as $k => $d ) {
 					$decls[$k] = $d;
 				}
-		if ( $this->peek( 0 ) == '}' )
+			}
+		}
+		if ( $this->peek( 0 ) == '}' ) {
 			$this->consume();
+		}
 		return $decls;
 	}

@ -163,8 +192,9 @@ class CSSParser {
 				$this->consume();
 				$this->consumeWS();
 				$text = '';
-			} else
-				$text .= $this->consume();
+			} else {
+				$text .= $this->consume()[0];
+			}
 		}
 		$selectors[] = $text;
 		if ( $this->peek( 0 ) == '{' ) {
@ -204,37 +234,44 @@ class CSSParser {
 				$at = $this->consume();
 				$this->consumeWS();
 				$text = '';
-				while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) )
-					$text .= $this->consume();
+				while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) ) {
+					$text .= $this->consume()[0];
+				}
 				if ( $this->peek( 0 ) == '{' ) {
 					$this->consume();
 					$r = $this->rules( [ '}', null ] );
-					if ( $r )
+					if ( $r ) {
 						$atrules[] = [ "name"=>$at, "text"=>$text, "rules"=>$r ];
+					}
 				} else {
 					$atrules[] = [ "name"=>$at, "text"=>$text ];
 				}
 			} elseif ( $this->peek( 0 )[0] == '@' ) {
 				$at = $this->consume();
 				$text = '';
-				while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) )
-					$text .= $this->consume();
+				while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) ) {
+					$text .= $this->consume()[0];
+				}
 				if ( $this->peek( 0 ) == '{' ) {
 					$this->consume();
 					$decl = $this->parseDecls();
-					if ( $decl )
+					if ( $decl ) {
 						$atrules[] = [ "name"=>$at, "text"=>$text, "rules"=>[ "selectors"=>'', "decls"=>$decl ] ];
+					}
 				} else {
 					$atrules[] = [ "name"=>$at, "text"=>$text ];
 				}
-			} else
+			} else {
 				$rules[] = $this->parseRule();
+			}
 			$this->consumeWS();
 		}
-		if ( $rules )
+		if ( $rules ) {
 			$atrules[] = [ "name"=>'', "rules"=>$rules ];
-		if ( $this->peek( 0 ) !== null )
+		}
+		if ( $this->peek( 0 ) !== null ) {
 			$this->consume();
+		}
 		return $atrules;
 	}

--- a/CSSRenderer.php
+++ b/CSSRenderer.php
@ -27,14 +27,16 @@ class CSSRenderer {
 	 *  and defaults to the empty string.
 	 */
 	function add( $rules, $media = '' ) {
-		if ( !array_key_exists( $media, $this->bymedia ) )
+		if ( !array_key_exists( $media, $this->bymedia ) ) {
 			$this->bymedia[$media] = [];
+		}

 		foreach ( $rules as $at ) {
 			switch ( $at['name'] ) {
 				case '@media':
-					if ( $media == '' )
+					if ( $media == '' ) {
 						$this->add( $at['rules'], "@media ".$at['text'] );
+					}
 					break;
 				case '':
 					$this->bymedia[$media] = array_merge( $this->bymedia[$media], $at['rules'] );
@ -54,18 +56,20 @@ class CSSRenderer {
 		$css = '';

 		foreach ( $this->bymedia as $at => $rules ) {
-			if ( $at != '' )
+			if ( $at != '' ) {
 				$css .= "$at {\n";
+			}
 			foreach ( $rules as $rule ) {
 				$css .= implode( ',', $rule['selectors'] ) . "{";
 				foreach ( $rule['decls'] as $key => $value ) {
-					$css .= "$key:$value";
+					$css .= "$key:" . implode( '', $value ) . ';';
 				}
 				$css .= "} ";
 			}
-			if ( $at != '' )
+			if ( $at != '' ) {
 				$css .= "} ";
 			}
+		}

 		return $css;
 	}
--- a/TemplateStyles.hooks.php
+++ b/TemplateStyles.hooks.php
@ -11,27 +11,40 @@ class TemplateStylesHooks {
 	 * Register parser hooks
 	 */
 	public static function onParserFirstCallInit( &$parser ) {
-		$parser->setHook( 'templatestyles', array( 'TemplateStylesHooks', 'render' ) );
+		$parser->setHook( 'templatestyles', 'TemplateStylesHooks::render' );
 		return true;
 	}

+	private static function decodeFromBlob( $blob ) {
+		$tree = gzdecode( $blob );
+		if ( $tree ) {
+			$tree = unserialize( $tree );
+		}
+		return $tree;
+	}
+
+	private static function encodeToBlob( $tree ) {
+		return gzencode( serialize( $tree ) );
+	}
+
 	public static function onOutputPageParserOutput( &$out, $parseroutput ) {
 		global $wgTemplateStylesNamespaces;
-		if ( $wgTemplateStylesNamespaces )
+		if ( $wgTemplateStylesNamespaces ) {
 			$namespaces = $wgTemplateStylesNamespaces;
-		else
+		} else {
 			$namespaces = [ NS_TEMPLATE ];
+		}

 		$renderer = new CSSRenderer();
 		$pages = [];

-		if ( $out->canUseWikiPage() )
-			$pages[$out->getWikiPage()->getID()] = 'self';
-
-		foreach ( $namespaces as $ns )
-			if ( array_key_exists( $ns, $parseroutput->getTemplates() ) )
-				foreach ( $parseroutput->getTemplates()[$ns] as $title => $pageid )
+		foreach ( $namespaces as $ns ) {
+			if ( array_key_exists( $ns, $parseroutput->getTemplates() ) ) {
+				foreach ( $parseroutput->getTemplates()[$ns] as $title => $pageid ) {
 					$pages[$pageid] = $title;
+				}
+			}
+		}

 		if ( count( $pages ) ) {
 			$db = wfGetDB( DB_SLAVE );
@ -43,22 +56,27 @@ class TemplateStylesHooks {
 				[ 'ORDER BY', 'pp_page' ]
 			);
 			foreach ( $res as $row ) {
-				$css = unserialize( gzdecode( $row->pp_value ) );
+				$css = self::decodeFromBlob( $row->pp_value );
+				if ( $css ) {
 					$renderer->add( $css );
 				}
+			}

 		}

 		$selfcss = $out->getProperty( 'templatestyles' );
 		if ( $selfcss ) {
-			$selfcss = unserialize( gzdecode( $selfcss ) );
+			$selfcss = self::decodeFromBlob( unserialize( gzdecode( $selfcss ) ) );
+			if ( $selfcss ) {
 				$renderer->add( $selfcss );
 			}
+		}

 		$css = $renderer->render();
-		if ( $css )
+		if ( $css ) {
 			$out->addInlineStyle( $css );
 		}
+	}

 	/**
 	 * Parser hook for <templatestyles>.
@ -77,9 +95,13 @@ class TemplateStylesHooks {
 	public static function render( $input, $args, $parser, $frame ) {
 		$css = new CSSParser( $input );

-		if ( $css )
-			$parser->getOutput()->setProperty( 'templatestyles', gzencode( serialize( $css->rules() ) ) );
+		if ( $css ) {
+			$parser->getOutput()->setProperty( 'templatestyles', self::encodeToBlob( $css->rules() ) );
+		}

+		// TODO: The UX would benefit from the CSS being run through the
+		// hook for syntax highlighting rather that simply being presented
+		// as a preformatted block.
 		$html =
 			Html::openElement( 'div', [ 'class' => 'mw-templatestyles-doc' ] )
 			. Html::rawElement(
--- a/composer.json
+++ b/composer.json
@ -2,7 +2,7 @@
    "license": "LGPL-2.1+",
 	"require-dev": {
 		"jakub-onderka/php-parallel-lint": "0.9",
-		"mediawiki/mediawiki-codesniffer": "0.4.0"
+		"mediawiki/mediawiki-codesniffer": "0.5.0"
 	},
 	"scripts": {
 		"test": [