mediawiki-extensions-Templa.../CSSParser.php
Reedy ca778e2b98 Few style tweaks
Parameter documentation type hints

Remove unused RL module

Change-Id: I6e265c97eababa4bbfebb9402141664ad24b4e43
2016-10-30 22:56:44 +00:00

370 lines
8.9 KiB
PHP

<?php
/**
* @file
* @ingroup Extensions
*/
/**
* Represents a style sheet as a structured tree, organized
* in rule blocks nested in at-rule blocks.
*
* @class
*/
class CSSParser {
/** @var array $tokens */
private $tokens;
/** @var int $index */
private $index;
/**
* Parse and (minimally) validate the passed string as a CSS, and
* constructs an array of tokens for parsing, as well as an index
* into that array.
*
* Internally, the class behaves as a lexer.
*
* @param string $css
*/
function __construct( $css ) {
$this->index = 0;
preg_match_all( '/(
[ \n\t]+
(?# Sequences of whitespace )
| \/\* (?: [^*]+ | \*[^\/] )* \*\/ [ \n\t]*
(?# Comments and any trailing whitespace )
| " (?: [^"\\\\\n]+ | \\\\. )* ["\n]
(?# Double-quoted string literals (to newline when unclosed )
| \' (?: [^\'\\\\\n]+ | \\\\. )* [\'\n]
(?# Single-quoted string literals (to newline when unclosed )
| [+-]? (?: [0-9]* \. )? [0-9]+ (?: [_a-z][_a-z0-9-]* | % )?
(?# Numerical literals - including optional trailing units or percent sign )
| @? -? (?: [_a-z] | \\\\[0-9a-f]{1,6} [ \n\t]? )
(?: [_a-z0-9-]+ | \\\\[0-9a-f]{1,6} [ \n\t]? | [^\0-\177] )* (?: [ \n\t]* \( )?
(?# Identifiers - including leading `@` for at-rule blocks )
(?# Trailing open captures are captured to match functional values )
| \# (?: [_a-z0-9-]+ | \\\\[0-9a-f]{1,6} [ \n\t]? | [^\0-\177] )*
(?# So-called hatch literals )
| u\+ [0-9a-f]{1,6} (?: - [0-9a-f]{1,6} )?
(?# Unicode range literals )
| u\+ [0-9a-f?]{1,6}
(?# Unicode mask literals )
| .
(?# Any unmatched token is reduced to single characters )
)/xis',
$css,
$match
);
$inWhitespaceRun = false;
foreach ( $match[0] as $token ) {
if ( preg_match( '/^(?: [ \n\t] | \/\* )/x', $token ) ) {
// Fold any sequence of whitespace to a single space token
if ( !$inWhitespaceRun ) {
$inWhitespaceRun = true;
$this->tokens[] = ' ';
continue;
}
} else {
// Decode any hexadecimal escape character into its
// corresponding UTF-8 sequence - output is UTF-8 so the
// escaping is unnecessary and this prevents trying to
// obfuscate ASCII in identifiers to prevent matches.
$token = preg_replace_callback(
'/\\\\([0-9a-f]{1,6})[ \n\t]?/',
function( $match ) {
return html_entity_decode(
'&#x' . $match[1] . ';', ENT_NOQUOTES, 'UTF-8' );
},
$token
);
// Close unclosed string literals
if ( preg_match( '/^ ([\'"]) (.*) \n $/x', $token, $match ) ) {
$token = $match[1] . $match[2] . $match[1];
}
$inWhitespaceRun = false;
$this->tokens[] = $token;
}
}
}
/**
* Get a token from the input stream without advancing the current
* position.
*
* @param int $offset Offset from current stream location
* @return string|null Token or null if offset is past the end of the
* input stream
*/
private function peek( $offset = 0 ) {
if ( ( $this->index + $offset ) >= count( $this->tokens ) ) {
return null;
}
return $this->tokens[$this->index + $offset];
}
/**
* Take a list of tokens from the input stream.
*
* @param int $num Number of tokens to take
* @return array List of tokens
*/
private function consume( $num = 1 ) {
if ( $num > 0 ) {
if ( $this->index+$num >= count( $this->tokens ) ) {
$num = count( $this->tokens ) - $this->index;
}
$text = array_slice( $this->tokens, $this->index, $num );
$this->index += $num;
return $text;
}
return [];
}
/**
* Take tokens from the input stream up to but not including a delimiter
* from the provided list.
*
* The next token in the stream should always be validated after using
* this function as it may return early if the end of the token stream is
* reached.
*
* @param array $delim List of delimiters
* @return array List of tokens
*/
private function consumeTo( $delim ) {
if ( !in_array( null, $delim ) ) {
// Make sure we don't hit an infinte loop on malformed input
$delim[] = null;
}
$consume = 0;
while ( !in_array( $this->peek( $consume ), $delim ) ) {
$consume++;
}
return $this->consume( $consume );
}
/**
* Take consecutive whitespace tokens from the input stream.
*
* @return array List of whitespace tokens
*/
private function consumeWS() {
$consume = 0;
while ( $this->peek( $consume ) === ' ' ) {
$consume++;
}
return $this->consume( $consume );
}
/**
* Parse a CSS declaration.
*
* Grammar:
*
* decl : WS* IDENT WS* ':' TOKEN* ';'
* | WS* IDENT <error> ';' -> skip
* ;
*
* @return array [ name => value ]
*/
private function parseDecl() {
$this->consumeWS();
$name = $this->consume()[0];
$this->consumeWS();
if ( $this->peek() != ':' ) {
$this->consumeTo( [ ';', '}' ] );
if ( $this->peek() ) {
$this->consume();
$this->consumeWS();
}
return null;
}
$this->consume();
$this->consumeWS();
$value = $this->consumeTo( [ ';', '}' ] );
if ( $this->peek() === ';' ) {
$this->consume();
$this->consumeWS();
}
return [ $name => $value ];
}
/**
* Parse a list of CSS declarations.
*
* Grammar:
*
* decls : '}'
* | decl decls
* ;
*
* @return array List of decls
* @see parseDecl
*/
private function parseDecls() {
$decls = [];
while ( $this->peek() !== null && $this->peek() != '}' ) {
$decl = $this->parseDecl();
if ( $decl ) {
foreach ( $decl as $k => $d ) {
$decls[$k] = $d;
}
}
}
return $decls;
}
/**
* Parse a CSS rule.
*
* Grammar:
*
* rule : WS* selectors ';'
* | WS* selectors '{' decls
* ;
* selectors : TOKEN*
* | selectors ',' TOKEN*
* ;
*
* @param string $baseSelectors Selector to prepend to all rules to
* enforce scoping.
* @return array|null [ selectors => [ selector* ], decls => [ decl* ] ]
*/
public function parseRule( $baseSelectors ) {
$selectors = [];
$text = '';
$this->consumeWS();
while ( !in_array( $this->peek(), [ '{', ';', null ] ) ) {
if ( $this->peek() === ',' ) {
if ( $text !== '' ) {
$selectors[] = "{$baseSelectors}{$text}";
}
$this->consume();
$this->consumeWS();
$text = '';
} else {
$text .= $this->consume()[0];
}
}
if ( $text !== '' ) {
$selectors[] = "{$baseSelectors}{$text}";
}
if ( $this->peek() === '{' ) {
$this->consume();
return [
'selectors' => $selectors,
'decls' => $this->parseDecls()
];
}
if ( $this->peek( 0 ) ) {
$this->consume();
}
return null;
}
/**
* Parses the token array, and returns a tree representing the CSS
* suitable for feeding CSSRenderer objects.
*
* Grammar:
*
* anyrule : ATIDENT='@media' WS* TOKEN* '{' rules '}'
* | ATIDENT WS* TOKEN* ';'
* | ATIDENT WS* TOKEN* '{' decls '}'
* | rule
* ;
* rules : anyrule
* | rules anyrule
* ;
*
* Output:
*
* [ [ name=>ATIDENT? , text=>body? , rules=>rules? ]* ]
*
* @param string $baseSelectors Selector to prepend to all rules to
* enforce scoping.
* @param array $end An array of string representing tokens that can end
* the parse. Defaults to ending only at the end of the string.
* @return array A tree describing the CSS rule blocks.
*/
public function rules( $baseSelectors = '', $end = [ null ] ) {
$atrules = [];
$rules = [];
$this->consumeWS();
while ( !in_array( $this->peek(), $end ) ) {
if ( strtolower( $this->peek() ) === '@media' ) {
$at = $this->consume()[0];
$this->consumeWS();
$text = '';
while ( !in_array( $this->peek(), [ '{', ';', null ] ) ) {
$text .= $this->consume()[0];
}
if ( $this->peek() === '{' ) {
$this->consume();
$r = $this->rules( $baseSelectors, [ '}', null ] );
if ( $r ) {
$atrules[] = [
'name' => $at,
'text' => $text,
'rules' => $r,
];
}
} else {
$atrules[] = [
'name' => $at,
'text' => $text,
];
}
} elseif ( $this->peek()[0] === '@' ) {
$at = $this->consume()[0];
$text = '';
while ( !in_array( $this->peek(), [ '{', ';', null ] ) ) {
$text .= $this->consume()[0];
}
if ( $this->peek() === '{' ) {
$this->consume();
$decl = $this->parseDecls();
if ( $decl ) {
$atrules[] = [
'name' => $at,
'text' => $text,
'rules' => [
'selectors' => '',
'decls' => $decl,
],
];
}
} else {
$atrules[] = [
'name' => $at,
'text' => $text,
];
}
} elseif ( $this->peek() === '}' ) {
$this->consume();
} else {
$rules[] = $this->parseRule( $baseSelectors );
}
$this->consumeWS();
}
if ( $rules ) {
$atrules[] = [
'name' => '',
'rules' => $rules,
];
}
$this->consumeWS();
if ( $this->peek() !== null ) {
$this->consume();
}
return $atrules;
}
}