DynamicPageList3/includes/LST.php

818 lines
25 KiB
PHP
Raw Normal View History

2020-11-22 20:00:48 +00:00
<?php
/**
* This is a modified and enhanced copy of a mediawiki extension called
*
* LabeledSectionTransclusion
*
* @link http://www.mediawiki.org/wiki/Extension:Labeled_Section_Transclusion Documentation
*
*
* @author Steve Sanbeg
* @copyright Copyright © 2006, Steve Sanbeg
* @license GPL-2.0-or-later
*
*
* This copy was made to avoid version conflicts between the two extensions.
* In this copy names were changed (wfLst.. --> wfDplLst..).
* So any version of LabeledSectionTransclusion can be installed together with DPL
*
* Enhancements were made to
* - allow inclusion of templates ("template swapping")
* - reduce the size of the transcluded text to a limit of <n> characters
*
*
* Thanks to Steve for his great work!
* -- Algorithmix
*/
2021-05-30 18:33:21 +00:00
2020-11-22 20:00:48 +00:00
namespace DPL;
use DPL\Lister\Lister;
2021-05-30 18:33:21 +00:00
use MediaWiki\MediaWikiServices;
2021-10-01 22:52:30 +00:00
use Parser;
use Title;
2020-11-22 20:00:48 +00:00
class LST {
2021-10-01 22:52:30 +00:00
# #############################################################
2020-11-22 20:00:48 +00:00
# To do transclusion from an extension, we need to interact with the parser
# at a low level. This is the general transclusion functionality
##############################################################
/**
* Register what we're working on in the parser, so we don't fall into a trap.
2021-10-01 22:52:30 +00:00
*
* @param Parser $parser
2020-11-22 20:00:48 +00:00
* @param $part1
* @return bool
2021-10-01 22:52:30 +00:00
*
* @suppress PhanUndeclaredProperty Use of Parser::mTemplatePath
2020-11-22 20:00:48 +00:00
*/
2021-02-22 23:48:01 +00:00
public static function open( $parser, $part1 ) {
2020-11-22 20:00:48 +00:00
// Infinite loop test
2021-02-22 23:48:01 +00:00
if ( isset( $parser->mTemplatePath[$part1] ) ) {
wfDebug( __METHOD__ . ": template loop broken at '$part1'\n" );
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
return false;
} else {
$parser->mTemplatePath[$part1] = 1;
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
return true;
}
}
/**
* Finish processing the function.
2021-10-01 22:52:30 +00:00
*
* @param Parser $parser
2020-11-22 20:00:48 +00:00
* @param $part1
2021-10-01 22:52:30 +00:00
*
* @suppress PhanUndeclaredProperty Use of Parser::mTemplatePath
2020-11-22 20:00:48 +00:00
*/
2021-02-22 23:48:01 +00:00
public static function close( $parser, $part1 ) {
2020-11-22 20:00:48 +00:00
// Infinite loop test
2021-02-22 23:48:01 +00:00
if ( isset( $parser->mTemplatePath[$part1] ) ) {
unset( $parser->mTemplatePath[$part1] );
2020-11-22 20:00:48 +00:00
} else {
2021-02-22 23:48:01 +00:00
wfDebug( __METHOD__ . ": close unopened template loop at '$part1'\n" );
2020-11-22 20:00:48 +00:00
}
}
/**
* Handle recursive substitution here, so we can break cycles, and set up
* return values so that edit sections will resolve correctly.
2021-02-22 23:48:01 +00:00
*/
2021-10-01 22:52:30 +00:00
private static function parse( $parser, $text, $part1, $skiphead = 0, $recursionCheck = true, $maxLength = -1, $link = '', $trim = false, $skipPattern = [] ) {
2020-11-22 20:00:48 +00:00
// if someone tries something like<section begin=blah>lst only</section>
// text, may as well do the right thing.
2021-02-22 23:48:01 +00:00
$text = str_replace( '</section>', '', $text );
2020-11-22 20:00:48 +00:00
// if desired we remove portions of the text, esp. template calls
2021-02-22 23:48:01 +00:00
foreach ( $skipPattern as $skipPat ) {
$text = preg_replace( $skipPat, '', $text );
2020-11-22 20:00:48 +00:00
}
2021-02-22 23:48:01 +00:00
if ( self::open( $parser, $part1 ) ) {
2020-11-22 20:00:48 +00:00
2021-10-01 22:52:30 +00:00
// Handle recursion here, so we can break cycles.
2021-02-22 23:48:01 +00:00
if ( $recursionCheck == false ) {
2021-10-01 22:52:30 +00:00
$text = $parser->preprocess( $text, $parser->getTitle(), $parser->getOptions() );
2021-02-22 23:48:01 +00:00
self::close( $parser, $part1 );
2020-11-22 20:00:48 +00:00
}
2021-02-22 23:48:01 +00:00
if ( $maxLength > 0 ) {
$text = self::limitTranscludedText( $text, $maxLength, $link );
2020-11-22 20:00:48 +00:00
}
2021-02-22 23:48:01 +00:00
if ( $trim ) {
return trim( $text );
2020-11-22 20:00:48 +00:00
} else {
return $text;
}
} else {
2021-10-01 22:52:30 +00:00
return "[[" . $parser->getTitle()->getPrefixedText() . "]]" . "<!-- WARNING: LST loop detected -->";
2020-11-22 20:00:48 +00:00
}
}
2021-10-01 22:52:30 +00:00
# #############################################################
2020-11-22 20:00:48 +00:00
# And now, the labeled section transclusion
##############################################################
/**
* Parser tag hook for <section>.
* The section markers aren't paired, so we only need to remove them.
*
* @param string $in
* @param array $assocArgs
* @param Parser $parser
2021-10-01 22:52:30 +00:00
* @return string
2020-11-22 20:00:48 +00:00
*/
2021-02-22 23:48:01 +00:00
private static function noop( $in, $assocArgs = [], $parser = null ) {
2020-11-22 20:00:48 +00:00
return '';
}
2021-10-01 22:52:30 +00:00
// Generate a regex to match the section(s) we're interested in.
2021-02-22 23:48:01 +00:00
private static function createSectionPattern( $sec, $to, &$any ) {
2021-10-01 22:52:30 +00:00
$any = false;
2021-02-22 23:48:01 +00:00
$to_sec = ( $to == '' ) ? $sec : $to;
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $sec[0] == '*' ) {
2020-11-22 20:00:48 +00:00
$any = true;
2021-02-22 23:48:01 +00:00
if ( $sec == '**' ) {
2020-11-22 20:00:48 +00:00
$sec = '[^\/>"' . "']+";
} else {
2021-02-22 23:48:01 +00:00
$sec = str_replace( '/', '\/', substr( $sec, 1 ) );
2020-11-22 20:00:48 +00:00
}
} else {
2021-02-22 23:48:01 +00:00
$sec = preg_quote( $sec, '/' );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $to_sec[0] == '*' ) {
if ( $to_sec == '**' ) {
2020-11-22 20:00:48 +00:00
$to_sec = '[^\/>"' . "']+";
} else {
2021-02-22 23:48:01 +00:00
$to_sec = str_replace( '/', '\/', substr( $to_sec, 1 ) );
2020-11-22 20:00:48 +00:00
}
} else {
2021-02-22 23:48:01 +00:00
$to_sec = preg_quote( $to_sec, '/' );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
$ws = "(?:\s+[^>]+)?";
2020-11-22 20:00:48 +00:00
return "/<section$ws\s+(?i:begin)=['\"]?" . "($sec)" . "['\"]?$ws\/?>(.*?)\n?<section$ws\s+(?:[^>]+\s+)?(?i:end)=" . "['\"]?\\1['\"]?" . "$ws\/?>/s";
}
/**
* Count headings in skipped text.
*
2021-10-01 22:52:30 +00:00
* Count skipped headings, so parser can skip them, to
* prevent wrong heading links.
2020-11-22 20:00:48 +00:00
*
* @param string $text
2021-10-01 22:52:30 +00:00
* @param int $limit
* @return int
2020-11-22 20:00:48 +00:00
*/
2021-02-22 23:48:01 +00:00
private static function countHeadings( $text, $limit ) {
2020-11-22 20:00:48 +00:00
$pat = '^(={1,6}).+\1\s*$()';
$count = 0;
$offset = 0;
$m = [];
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
while ( preg_match( "/$pat/im", $text, $m, PREG_OFFSET_CAPTURE, $offset ) ) {
if ( $m[2][1] > $limit ) {
2020-11-22 20:00:48 +00:00
break;
}
$count++;
$offset = $m[2][1];
}
return $count;
}
2021-02-22 23:48:01 +00:00
public static function text( $parser, $page, &$title, &$text ) {
2021-10-01 22:52:30 +00:00
$title = Title::newFromText( $page );
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
if ( $title === null ) {
2020-11-22 20:00:48 +00:00
$text = '';
return true;
} else {
2021-02-22 23:48:01 +00:00
$text = $parser->fetchTemplateAndTitle( $title )[0];
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
// if article doesn't exist, return a red link.
2021-02-22 23:48:01 +00:00
if ( $text == false ) {
2020-11-22 20:00:48 +00:00
$text = "[[" . $title->getPrefixedText() . "]]";
return false;
} else {
return true;
}
}
2021-10-01 22:52:30 +00:00
// section inclusion - include all matching sections
2021-02-22 23:48:01 +00:00
public static function includeSection( $parser, $page = '', $sec = '', $to = '', $recursionCheck = true, $trim = false, $skipPattern = [] ) {
2020-11-22 20:00:48 +00:00
$output = [];
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( self::text( $parser, $page, $title, $text ) == false ) {
2020-11-22 20:00:48 +00:00
$output[] = $text;
return $output;
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$any = false;
2021-02-22 23:48:01 +00:00
$pat = self::createSectionPattern( $sec, $to, $any );
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
preg_match_all( $pat, $text, $m, PREG_PATTERN_ORDER );
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
foreach ( $m[2] as $nr => $piece ) {
2021-10-01 22:52:30 +00:00
$piece = self::parse( $parser, $piece, "#lst:${page}|${sec}", 0, $recursionCheck, $trim, $skipPattern );
2021-02-22 23:48:01 +00:00
if ( $any ) {
2020-11-22 20:00:48 +00:00
$output[] = $m[1][$nr] . '::' . $piece;
} else {
$output[] = $piece;
}
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
return $output;
}
/**
* Truncate a portion of wikitext so that ..
* ... does not contain (open) html comments
* ... it is not larger that $lim characters
* ... it is balanced in terms of braces, brackets and tags
* ... it is cut at a word boundary (white space) if possible
* ... can be used as content of a wikitable field without spoiling the whole surrounding wikitext structure
2021-10-01 22:52:30 +00:00
*
* @param $text the wikitext to be truncated
* @param $limit limit of character count for the result
* @param $link an optional link which will be appended to the text if it was truncated
*
* @return string the truncated text;
2020-11-22 20:00:48 +00:00
* note that the returned text may be longer than the limit if this is necessary
* to return something at all. We do not want to return an empty string if the input is not empty
* if the text is already shorter than the limit, the text
* will be returned without any checks for balance of tags
*/
2021-02-22 23:48:01 +00:00
public static function limitTranscludedText( $text, $limit, $link = '' ) {
2020-11-22 20:00:48 +00:00
// if text is smaller than limit return complete text
2021-02-22 23:48:01 +00:00
if ( $limit >= strlen( $text ) ) {
2020-11-22 20:00:48 +00:00
return $text;
}
// otherwise strip html comments and check again
2021-02-22 23:48:01 +00:00
$text = preg_replace( '/<!--.*?-->/s', '', $text );
if ( $limit >= strlen( $text ) ) {
2020-11-22 20:00:48 +00:00
return $text;
}
// search latest position with balanced brackets/braces
// store also the position of the last preceding space
2021-10-01 22:52:30 +00:00
$brackets = 0;
2020-11-22 20:00:48 +00:00
$cbrackets = 0;
2021-10-01 22:52:30 +00:00
$n0 = -1;
$nb = 0;
2021-02-22 23:48:01 +00:00
for ( $i = 0; $i < $limit; $i++ ) {
2020-11-22 20:00:48 +00:00
$c = $text[$i];
2021-02-22 23:48:01 +00:00
if ( $c == '[' ) {
2020-11-22 20:00:48 +00:00
$brackets++;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $c == ']' ) {
2020-11-22 20:00:48 +00:00
$brackets--;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $c == '{' ) {
2020-11-22 20:00:48 +00:00
$cbrackets++;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $c == '}' ) {
2020-11-22 20:00:48 +00:00
$cbrackets--;
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
// we store the position if it is valid in terms of parentheses balancing
2021-02-22 23:48:01 +00:00
if ( $brackets == 0 && $cbrackets == 0 ) {
2020-11-22 20:00:48 +00:00
$n0 = $i;
2021-02-22 23:48:01 +00:00
if ( $c == ' ' ) {
2020-11-22 20:00:48 +00:00
$nb = $i;
}
}
}
// if there is a valid cut-off point we use it; it will be the largest one which is not above the limit
2021-02-22 23:48:01 +00:00
if ( $n0 >= 0 ) {
2020-11-22 20:00:48 +00:00
// we try to cut off at a word boundary, this may lead to a shortening of max. 15 chars
2021-10-01 22:52:30 +00:00
// @phan-suppress-next-line PhanSuspiciousValueComparison
2021-02-22 23:48:01 +00:00
if ( $nb > 0 && $nb + 15 > $n0 ) {
2020-11-22 20:00:48 +00:00
$n0 = $nb;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
$cut = substr( $text, 0, $n0 + 1 );
2020-11-22 20:00:48 +00:00
// an open html comment would be fatal, but this should not happen as we already have
// eliminated html comments at the beginning
// some tags are critical: ref, pre, nowiki
// if these tags were not balanced they would spoil the result completely
// we enforce balance by appending the necessary amount of corresponding closing tags
// currently we ignore the nesting, i.e. all closing tags are appended at the end.
// This simple approach may fail in some cases ...
2021-10-01 22:52:30 +00:00
$matches = [];
2021-02-22 23:48:01 +00:00
$noMatches = preg_match_all( '#<\s*(/?ref|/?pre|/?nowiki)(\s+[^>]*?)*>#im', $cut, $matches );
2021-10-01 22:52:30 +00:00
$tags = [
2020-11-22 20:00:48 +00:00
'ref' => 0,
'pre' => 0,
'nowiki' => 0
];
2021-02-22 23:48:01 +00:00
if ( $noMatches > 0 ) {
2020-11-22 20:00:48 +00:00
// calculate tag count (ignoring nesting)
2021-02-22 23:48:01 +00:00
foreach ( $matches[1] as $mm ) {
if ( $mm[0] == '/' ) {
$tags[substr( $mm, 1 )]--;
2020-11-22 20:00:48 +00:00
} else {
$tags[$mm]++;
}
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
// append missing closing tags - should the tags be ordered by precedence ?
2021-02-22 23:48:01 +00:00
foreach ( $tags as $tagName => $level ) {
2021-10-01 22:52:30 +00:00
// @phan-suppress-next-line PhanPluginLoopVariableReuse
2021-02-22 23:48:01 +00:00
while ( $level > 0 ) {
2020-11-22 20:00:48 +00:00
// avoid empty ref tag
2021-02-22 23:48:01 +00:00
if ( $tagName == 'ref' && substr( $cut, strlen( $cut ) - 5 ) == '<ref>' ) {
$cut = substr( $cut, 0, strlen( $cut ) - 5 );
2020-11-22 20:00:48 +00:00
} else {
$cut .= '</' . $tagName . '>';
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$level--;
}
}
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
return $cut . $link;
2021-02-22 23:48:01 +00:00
} elseif ( $limit == 0 ) {
2020-11-22 20:00:48 +00:00
return $link;
} else {
// otherwise we recurse and try again with twice the limit size; this will lead to bigger output but
// it will at least produce some output at all; otherwise the reader might think that there
// is no information at all
2021-02-22 23:48:01 +00:00
return self::limitTranscludedText( $text, $limit * 2, $link );
2020-11-22 20:00:48 +00:00
}
}
2021-02-22 23:48:01 +00:00
public static function includeHeading( $parser, $page = '', $sec = '', $to = '', &$sectionHeading, $recursionCheck = true, $maxLength = -1, $link = 'default', $trim = false, $skipPattern = [] ) {
2021-10-01 22:52:30 +00:00
// @phan-suppress-previous-line PhanParamReqAfterOpt
2020-11-22 20:00:48 +00:00
$output = [];
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( self::text( $parser, $page, $title, $text ) == false ) {
2020-11-22 20:00:48 +00:00
$output[0] = $text;
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
return $output;
}
2021-10-01 22:52:30 +00:00
// throw away comments
2021-02-22 23:48:01 +00:00
$text = preg_replace( '/<!--.*?-->/s', '', $text );
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
return self::extractHeadingFromText( $parser, $page, $title, $text, $sec, $to, $sectionHeading, $recursionCheck, $maxLength, $link, $trim, $skipPattern );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
// section inclusion - include all matching sections (return array)
2021-02-22 23:48:01 +00:00
public static function extractHeadingFromText( $parser, $page, $title, $text, $sec = '', $to = '', &$sectionHeading, $recursionCheck = true, $maxLength = -1, $cLink = 'default', $trim = false, $skipPattern = [] ) {
2021-10-01 22:52:30 +00:00
// @phan-suppress-previous-line PhanParamReqAfterOpt
2020-11-22 20:00:48 +00:00
$continueSearch = true;
2021-10-01 22:52:30 +00:00
$output = [];
$n = 0;
$output[$n] = '';
$nr = 0;
2020-11-22 20:00:48 +00:00
// check if we are going to fetch the n-th section
2021-02-22 23:48:01 +00:00
if ( preg_match( '/^%-?[1-9][0-9]*$/', $sec ) ) {
$nr = substr( $sec, 1 );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( preg_match( '/^%0$/', $sec ) ) {
2021-12-15 20:22:38 +00:00
// transclude text before the first section
$nr = -2;
2020-11-22 20:00:48 +00:00
}
// if the section name starts with a # or with a @ we use it as regexp, otherwise as plain string
$isPlain = true;
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $sec != '' && ( $sec[0] == '#' || $sec[0] == '@' ) ) {
2021-10-01 22:52:30 +00:00
$sec = substr( $sec, 1 );
2020-11-22 20:00:48 +00:00
$isPlain = false;
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
do {
2021-10-01 22:52:30 +00:00
// Generate a regex to match the === classical heading section(s) === we're
2020-11-22 20:00:48 +00:00
//interested in.
$headLine = '';
2021-10-01 22:52:30 +00:00
$begin_off = 0;
2021-02-22 23:48:01 +00:00
if ( $sec == '' ) {
2021-10-01 22:52:30 +00:00
$head_len = 6;
2020-11-22 20:00:48 +00:00
} else {
2021-02-22 23:48:01 +00:00
if ( $nr != 0 ) {
2020-11-22 20:00:48 +00:00
$pat = '^(={1,6})\s*[^=\s\n][^\n=]*\s*\1\s*($)';
2021-02-22 23:48:01 +00:00
} elseif ( $isPlain ) {
$pat = '^(={1,6})\s*' . preg_quote( $sec, '/' ) . '\s*\1\s*($)';
2020-11-22 20:00:48 +00:00
} else {
2021-02-22 23:48:01 +00:00
$pat = '^(={1,6})\s*' . str_replace( '/', '\/', $sec ) . '\s*\1\s*($)';
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( preg_match( "/$pat/im", $text, $m, PREG_OFFSET_CAPTURE ) ) {
2021-10-01 22:52:30 +00:00
$mata = [];
2021-02-22 23:48:01 +00:00
$no_parenthesis = preg_match_all( '/\(/', $pat, $mata );
2021-10-01 22:52:30 +00:00
$begin_off = $m[$no_parenthesis][1];
$head_len = strlen( $m[1][0] );
$headLine = trim( $m[0][0], "\n =\t" );
2021-02-22 23:48:01 +00:00
} elseif ( $nr == -2 ) {
2021-12-15 20:22:38 +00:00
// take whole article if no heading found
$m[1][1] = strlen( $text ) + 1;
2020-11-22 20:00:48 +00:00
} else {
// match failed
return $output;
}
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
// create a link symbol (arrow, img, ...) in case we have to cut the text block to maxLength
$link = $cLink;
2021-02-22 23:48:01 +00:00
if ( $link == 'default' ) {
2020-11-22 20:00:48 +00:00
$link = ' [[' . $page . '#' . $headLine . '|..→]]';
2021-02-22 23:48:01 +00:00
} elseif ( strstr( $link, 'img=' ) != false ) {
$link = str_replace( 'img=', "<linkedimage>page=" . $page . '#' . $headLine . "\nimg=Image:", $link ) . "\n</linkedimage>";
} elseif ( strstr( $link, '%SECTION%' ) == false ) {
2020-11-22 20:00:48 +00:00
$link = ' [[' . $page . '#' . $headLine . '|' . $link . ']]';
} else {
2021-02-22 23:48:01 +00:00
$link = str_replace( '%SECTION%', $page . '#' . $headLine, $link );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $nr == -2 ) {
2020-11-22 20:00:48 +00:00
// output text before first section and done
2021-10-01 22:52:30 +00:00
$piece = substr( $text, 0, $m[1][1] - 1 );
$output[0] = self::parse( $parser, $piece, "#lsth:${page}|${sec}", 0, $recursionCheck, $maxLength, $link, $trim, $skipPattern );
2020-11-22 20:00:48 +00:00
return $output;
}
2021-02-22 23:48:01 +00:00
if ( isset( $end_off ) ) {
unset( $end_off );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $to != '' ) {
2021-10-01 22:52:30 +00:00
// if $to is supplied, try and match it. If we don't match, just ignore it.
2021-02-22 23:48:01 +00:00
if ( $isPlain ) {
$pat = '^(={1,6})\s*' . preg_quote( $to, '/' ) . '\s*\1\s*$';
2020-11-22 20:00:48 +00:00
} else {
2021-02-22 23:48:01 +00:00
$pat = '^(={1,6})\s*' . str_replace( '/', '\/', $to ) . '\s*\1\s*$';
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( preg_match( "/$pat/im", $text, $mm, PREG_OFFSET_CAPTURE, $begin_off ) ) {
2020-11-22 20:00:48 +00:00
$end_off = $mm[0][1] - 1;
}
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( !isset( $end_off ) ) {
if ( $nr != 0 ) {
2020-11-22 20:00:48 +00:00
$pat = '^(={1,6})\s*[^\s\n=][^\n=]*\s*\1\s*$';
} else {
2021-10-01 22:52:30 +00:00
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable
2020-11-22 20:00:48 +00:00
$pat = '^(={1,' . $head_len . '})(?!=)\s*.*?\1\s*$';
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( preg_match( "/$pat/im", $text, $mm, PREG_OFFSET_CAPTURE, $begin_off ) ) {
2020-11-22 20:00:48 +00:00
$end_off = $mm[0][1] - 1;
2021-02-22 23:48:01 +00:00
} elseif ( $sec == '' ) {
2020-11-22 20:00:48 +00:00
$end_off = -1;
}
}
2021-02-22 23:48:01 +00:00
$nhead = self::countHeadings( $text, $begin_off );
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
wfDebug( "LSTH: head offset = $nhead" );
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
if ( isset( $end_off ) ) {
if ( $end_off == -1 ) {
2020-11-22 20:00:48 +00:00
return $output;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
$piece = substr( $text, $begin_off, $end_off - $begin_off );
if ( $sec == '' ) {
2020-11-22 20:00:48 +00:00
$continueSearch = false;
} else {
2021-02-22 23:48:01 +00:00
$text = substr( $text, $end_off );
2020-11-22 20:00:48 +00:00
}
} else {
2021-10-01 22:52:30 +00:00
$piece = substr( $text, $begin_off );
2020-11-22 20:00:48 +00:00
$continueSearch = false;
}
2021-02-22 23:48:01 +00:00
if ( $nr > 1 ) {
2020-11-22 20:00:48 +00:00
// skip until we reach the n-th section
$nr--;
continue;
}
2021-02-22 23:48:01 +00:00
if ( isset( $m[0][0] ) ) {
2020-11-22 20:00:48 +00:00
$sectionHeading[$n] = $headLine;
} else {
$sectionHeading[0] = $headLine;
}
2021-02-22 23:48:01 +00:00
if ( $nr == 1 ) {
2020-11-22 20:00:48 +00:00
// output n-th section and done
2021-10-01 22:52:30 +00:00
$output[0] = self::parse( $parser, $piece, "#lsth:${page}|${sec}", $nhead, $recursionCheck, $maxLength, $link, $trim, $skipPattern );
2020-11-22 20:00:48 +00:00
break;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $nr == -1 ) {
if ( !isset( $end_off ) ) {
2020-11-22 20:00:48 +00:00
// output last section and done
2021-10-01 22:52:30 +00:00
$output[0] = self::parse( $parser, $piece, "#lsth:${page}|${sec}", $nhead, $recursionCheck, $maxLength, $link, $trim, $skipPattern );
2020-11-22 20:00:48 +00:00
break;
}
} else {
// output section by name and continue search for another section with the same name
2021-10-01 22:52:30 +00:00
$output[$n++] = self::parse( $parser, $piece, "#lsth:${page}|${sec}", $nhead, $recursionCheck, $maxLength, $link, $trim, $skipPattern );
2020-11-22 20:00:48 +00:00
}
2021-02-22 23:48:01 +00:00
} while ( $continueSearch );
2020-11-22 20:00:48 +00:00
return $output;
}
// template inclusion - find the place(s) where template1 is called,
// replace its name by template2, then expand template2 and return the result
// we return an array containing all occurences of the template call which match the condition "$mustMatch"
// and do NOT match the condition "$mustNotMatch" (if specified)
// we use a callback function to format retrieved parameters, accessible via $lister->formatTemplateArg()
2021-02-22 23:48:01 +00:00
public static function includeTemplate( $parser, Lister $lister, $dplNr, $article, $template1 = '', $template2 = '', $defaultTemplate, $mustMatch, $mustNotMatch, $matchParsed, $catlist ) {
2021-10-01 22:52:30 +00:00
// @phan-suppress-previous-line PhanParamReqAfterOpt
$page = $article->mTitle->getPrefixedText();
$date = $article->myDate;
$user = $article->mUserLink;
$title = Title::newFromText( $page );
// get text and throw away html comments
2021-02-22 23:48:01 +00:00
$text = preg_replace( '/<!--.*?-->/s', '', $parser->fetchTemplateAndTitle( $title )[0] );
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
if ( $template1 != '' && $template1[0] == '#' ) {
2020-11-22 20:00:48 +00:00
// --------------------------------------------- looking for a parser function call
2021-10-01 22:52:30 +00:00
$template1 = substr( $template1, 1 );
$template2 = substr( $template2, 1 );
2021-02-22 23:48:01 +00:00
$defaultTemplate = substr( $defaultTemplate, 1 );
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
// when looking for parser function calls we accept regexp search patterns
2021-10-01 22:52:30 +00:00
$text2 = preg_replace( "/\{\{\s*#(" . $template1 . ')(\s*[:}])/i', '°³²|%PFUNC%=\1\2|', $text );
$tCalls = preg_split( '/°³²/', ' ' . $text2 );
2021-02-22 23:48:01 +00:00
foreach ( $tCalls as $i => $tCall ) {
if ( ( $n = strpos( $tCall, ':' ) ) !== false ) {
2020-11-22 20:00:48 +00:00
$tCalls[$i][$n] = ' ';
}
}
2021-02-22 23:48:01 +00:00
} elseif ( $template1 != '' && $template1[0] == '~' ) {
2020-11-22 20:00:48 +00:00
// --------------------------------------------- looking for an xml-tag extension call
2021-10-01 22:52:30 +00:00
$template1 = substr( $template1, 1 );
$template2 = substr( $template2, 1 );
2021-02-22 23:48:01 +00:00
$defaultTemplate = substr( $defaultTemplate, 1 );
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
// looking for tags
2021-10-01 22:52:30 +00:00
$text2 = preg_replace( '/\<\s*(' . $template1 . ')\s*\>/i', '°³²|%TAG%=\1|%TAGBODY%=', $text );
$tCalls = preg_split( '/°³²/', ' ' . $text2 );
2021-02-22 23:48:01 +00:00
foreach ( $tCalls as $i => $tCall ) {
$tCalls[$i] = preg_replace( '/\<\s*\/' . $template1 . '\s*\>.*/is', '}}', $tCall );
2020-11-22 20:00:48 +00:00
}
} else {
// --------------------------------------------- looking for template call
// we accept plain text as a template name, space or underscore are the same
// the localized name for "Template:" may preceed the template name
// the name may start with a different namespace for the surrogate template, followed by ::
2021-05-30 18:33:21 +00:00
$contLang = MediaWikiServices::getInstance()->getContentLanguage();
$nsNames = $contLang->getNamespaces();
2021-10-01 22:52:30 +00:00
$tCalls = preg_split( '/\{\{\s*(Template:|' . $nsNames[10] . ':)?' . self::spaceOrUnderscore( preg_quote( $template1, '/' ) ) . '\s*[|}]/i', ' ' . $text );
2020-11-22 20:00:48 +00:00
// We restore the first separator symbol (we had to include that symbol into the SPLIT, because we must make
// sure that we only accept exact matches of the complete template name
// (e.g. when looking for "foo" we must not accept "foo xyz")
2021-02-22 23:48:01 +00:00
foreach ( $tCalls as $nr => $tCall ) {
if ( $tCall[0] == '}' ) {
2020-11-22 20:00:48 +00:00
$tCalls[$nr] = '}' . $tCall;
} else {
$tCalls[$nr] = '|' . $tCall;
}
}
}
2021-10-01 22:52:30 +00:00
$output = [];
2020-11-22 20:00:48 +00:00
$extractParm = [];
// check if we want to extract parameters directly from the call
// in that case we won´t invoke template2 but will directly return the extracted parameters
// as a sequence of table columns;
2021-02-22 23:48:01 +00:00
if ( strlen( $template2 ) > strlen( $template1 ) && substr( $template2, 0, strlen( $template1 ) + 1 ) == ( $template1 . ':' ) ) {
$extractParm = preg_split( '/:\s*/s', trim( substr( $template2, strlen( $template1 ) + 1 ) ) );
2020-11-22 20:00:48 +00:00
}
2021-02-22 23:48:01 +00:00
if ( count( $tCalls ) <= 1 ) {
2020-11-22 20:00:48 +00:00
// template was not called (note that count will be 1 if there is no template invocation)
2021-02-22 23:48:01 +00:00
if ( count( $extractParm ) > 0 ) {
2020-11-22 20:00:48 +00:00
// if parameters are required directly: return empty columns
2021-02-22 23:48:01 +00:00
if ( count( $extractParm ) > 1 ) {
$output[0] = $lister->formatTemplateArg( '', $dplNr, 0, true, -1, $article );
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
for ( $i = 1; $i < count( $extractParm ); $i++ ) {
$output[0] .= "\n|" . $lister->formatTemplateArg( '', $dplNr, $i, true, -1, $article );
2020-11-22 20:00:48 +00:00
}
} else {
2021-02-22 23:48:01 +00:00
$output[0] = $lister->formatTemplateArg( '', $dplNr, 0, true, -1, $article );
2020-11-22 20:00:48 +00:00
}
} else {
// put a red link into the output
2021-10-01 22:52:30 +00:00
$output[0] = $parser->preprocess( '{{' . $defaultTemplate . '|%PAGE%=' . $page . '|%TITLE%=' . $title->getText() . '|%DATE%=' . $date . '|%USER%=' . $user . '}}', $parser->getTitle(), $parser->getOptions() );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
unset( $title );
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
return $output;
}
$output[0] = '';
2021-10-01 22:52:30 +00:00
$n = -2;
2020-11-22 20:00:48 +00:00
// loop for all template invocations
$firstCall = true;
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
foreach ( $tCalls as $iii => $tCall ) {
if ( $n == -2 ) {
2020-11-22 20:00:48 +00:00
$n++;
continue;
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$c = $tCall[0];
// normally we construct a call for template2 with the parameters of template1
2021-02-22 23:48:01 +00:00
if ( count( $extractParm ) == 0 ) {
2020-11-22 20:00:48 +00:00
// find the end of the call: bracket level must be zero
2021-10-01 22:52:30 +00:00
$cbrackets = 0;
2020-11-22 20:00:48 +00:00
$templateCall = '{{' . $template2 . $tCall;
2021-10-01 22:52:30 +00:00
$size = strlen( $templateCall );
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
for ( $i = 0; $i < $size; $i++ ) {
2020-11-22 20:00:48 +00:00
$c = $templateCall[$i];
2021-02-22 23:48:01 +00:00
if ( $c == '{' ) {
2020-11-22 20:00:48 +00:00
$cbrackets++;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $c == '}' ) {
2020-11-22 20:00:48 +00:00
$cbrackets--;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $cbrackets == 0 ) {
2020-11-22 20:00:48 +00:00
// if we must match a condition: test against it
2021-02-22 23:48:01 +00:00
if ( ( $mustMatch == '' || preg_match( $mustMatch, substr( $templateCall, 0, $i - 1 ) ) ) && ( $mustNotMatch == '' || !preg_match( $mustNotMatch, substr( $templateCall, 0, $i - 1 ) ) ) ) {
$invocation = substr( $templateCall, 0, $i - 1 );
2021-10-01 22:52:30 +00:00
$argChain = $invocation . '|%PAGE%=' . $page . '|%TITLE%=' . $title->getText();
2021-02-22 23:48:01 +00:00
if ( $catlist != '' ) {
2020-11-22 20:00:48 +00:00
$argChain .= "|%CATLIST%=$catlist";
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
$argChain .= '|%DATE%=' . $date . '|%USER%=' . $user . '|%ARGS%=' . str_replace( '|', '§', preg_replace( '/[}]+/', '}', preg_replace( '/[{]+/', '{', substr( $invocation, strlen( $template2 ) + 2 ) ) ) ) . '}}';
2021-10-01 22:52:30 +00:00
$output[++$n] = $parser->preprocess( $argChain, $parser->getTitle(), $parser->getOptions() );
2020-11-22 20:00:48 +00:00
}
break;
}
}
} else {
// if the user wants parameters directly from the call line of template1 we return just those
2021-10-01 22:52:30 +00:00
$cbrackets = 2;
2020-11-22 20:00:48 +00:00
$templateCall = $tCall;
2021-10-01 22:52:30 +00:00
$size = strlen( $templateCall );
$parms = [];
$parm = '';
$hasParm = false;
2020-11-22 20:00:48 +00:00
2021-02-22 23:48:01 +00:00
for ( $i = 0; $i < $size; $i++ ) {
2020-11-22 20:00:48 +00:00
$c = $templateCall[$i];
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $c == '{' || $c == '[' ) {
2021-12-15 20:22:38 +00:00
// we count both types of brackets
$cbrackets++;
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $c == '}' || $c == ']' ) {
2020-11-22 20:00:48 +00:00
$cbrackets--;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $cbrackets == 2 && $c == '|' ) {
$parms[] = trim( $parm );
2020-11-22 20:00:48 +00:00
$hasParm = true;
2021-10-01 22:52:30 +00:00
$parm = '';
2020-11-22 20:00:48 +00:00
} else {
$parm .= $c;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $cbrackets == 0 ) {
if ( $hasParm ) {
$parms[] = trim( substr( $parm, 0, strlen( $parm ) - 2 ) );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
array_splice( $parms, 0, 1 );
2020-11-22 20:00:48 +00:00
// if we must match a condition: test against it
2021-02-22 23:48:01 +00:00
$callText = substr( $templateCall, 0, $i - 1 );
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( ( $mustMatch == '' || ( ( $matchParsed && preg_match( $mustMatch, $parser->recursiveTagParse( $callText ) ) ) || ( !$matchParsed && preg_match( $mustMatch, $callText ) ) ) ) && ( $mustNotMatch == '' || ( ( $matchParsed && !preg_match( $mustNotMatch, $parser->recursiveTagParse( $callText ) ) ) || ( !$matchParsed && !preg_match( $mustNotMatch, $callText ) ) ) ) ) {
2020-11-22 20:00:48 +00:00
$output[++$n] = '';
2021-10-01 22:52:30 +00:00
$second = false;
2021-02-22 23:48:01 +00:00
foreach ( $extractParm as $exParmKey => $exParm ) {
2020-11-22 20:00:48 +00:00
$maxlen = -1;
2021-02-22 23:48:01 +00:00
if ( ( $limpos = strpos( $exParm, '[' ) ) > 0 && $exParm[strlen( $exParm ) - 1] == ']' ) {
$maxlen = intval( substr( $exParm, $limpos + 1, strlen( $exParm ) - $limpos - 2 ) );
$exParm = substr( $exParm, 0, $limpos );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $second ) {
2021-10-01 22:52:30 +00:00
// @phan-suppress-next-line PhanTypeInvalidDimOffset
2021-02-22 23:48:01 +00:00
if ( $output[$n] == '' || $output[$n][strlen( $output[$n] ) - 1] != "\n" ) {
2020-11-22 20:00:48 +00:00
$output[$n] .= "\n";
}
2021-10-01 22:52:30 +00:00
2021-12-15 20:22:38 +00:00
$output[$n] .= "|";
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$found = false;
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
// % in parameter name
2021-02-22 23:48:01 +00:00
if ( strpos( $exParm, '%' ) !== false ) {
2020-11-22 20:00:48 +00:00
// %% is a short form for inclusion of %PAGE% and %TITLE%
$found = true;
2021-02-22 23:48:01 +00:00
$output[$n] .= $lister->formatTemplateArg( $exParm, $dplNr, $exParmKey, $firstCall, $maxlen, $article );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( !$found ) {
2020-11-22 20:00:48 +00:00
// named parameter
2021-02-22 23:48:01 +00:00
$exParmQuote = str_replace( '/', '\/', $exParm );
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
foreach ( $parms as $parm ) {
if ( !preg_match( "/^\s*$exParmQuote\s*=/", $parm ) ) {
2020-11-22 20:00:48 +00:00
continue;
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$found = true;
2021-02-22 23:48:01 +00:00
$output[$n] .= $lister->formatTemplateArg( preg_replace( "/^$exParmQuote\s*=\s*/", "", $parm ), $dplNr, $exParmKey, $firstCall, $maxlen, $article );
2020-11-22 20:00:48 +00:00
break;
}
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( !$found && is_numeric( $exParm ) && intval( $exParm ) == $exParm ) {
2020-11-22 20:00:48 +00:00
// numeric parameter
$np = 0;
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
foreach ( $parms as $parm ) {
if ( strstr( $parm, '=' ) === false ) {
2020-11-22 20:00:48 +00:00
++$np;
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( $np != $exParm ) {
2020-11-22 20:00:48 +00:00
continue;
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$found = true;
2021-02-22 23:48:01 +00:00
$output[$n] .= $lister->formatTemplateArg( $parm, $dplNr, $exParmKey, $firstCall, $maxlen, $article );
2020-11-22 20:00:48 +00:00
break;
}
}
2021-10-01 22:52:30 +00:00
2021-02-22 23:48:01 +00:00
if ( !$found ) {
$output[$n] .= $lister->formatTemplateArg( '', $dplNr, $exParmKey, $firstCall, $maxlen, $article );
2020-11-22 20:00:48 +00:00
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$second = true;
}
}
break;
}
}
}
2021-10-01 22:52:30 +00:00
2020-11-22 20:00:48 +00:00
$firstCall = false;
}
return $output;
}
2021-02-22 23:48:01 +00:00
public static function spaceOrUnderscore( $pattern ) {
2020-11-22 20:00:48 +00:00
// returns a pettern that matches underscores as well as spaces
2021-02-22 23:48:01 +00:00
return str_replace( ' ', '[ _]', $pattern );
2020-11-22 20:00:48 +00:00
}
}