Extract stack and state to a new class

Most of this state is used to manage interactions with other state,
and encapsulation allows us to hide data structures and access behind
self-explanatory function names.

The interface is still much wider than I'd like, but it can be improved in
future work.

There is one small behavior change in here: in the `follows` edge case
demonstrated by I3bdf26fd14, we prepend if the splice point cannot be
used because it has a non-numeric key.  I believe this was the original
intention of the logic, and is how the numeric case behaves.  I've verified
that when array_splice throws a warning about non-numeric key, it fails to
add anything to the original array, so the broken follows ref disappeared.

Bug: T237241
Change-Id: I091a0b71ee9aa78e841c2e328018e886a7217715
This commit is contained in:
Adam Wight 2019-11-22 18:28:51 +01:00
parent 10e4a4353d
commit 8453e3ecd7
4 changed files with 850 additions and 315 deletions

View file

@ -31,7 +31,6 @@ use Parser;
use ParserOptions; use ParserOptions;
use ParserOutput; use ParserOutput;
use Sanitizer; use Sanitizer;
use StripState;
class Cite { class Cite {
@ -69,57 +68,6 @@ class Cite {
*/ */
public const BOOK_REF_PROPERTY = 'ref-extends'; public const BOOK_REF_PROPERTY = 'ref-extends';
/**
* Datastructure representing <ref> input, in the format of:
* <code>
* [
* 'user supplied' => [
* 'text' => 'user supplied reference & key',
* 'count' => 1, // occurs twice
* 'number' => 1, // The first reference, we want
* // all occourances of it to
* // use the same number
* ],
* 0 => [
* 'text' => 'Anonymous reference',
* 'count' => -1,
* ],
* 1 => [
* 'text' => 'Another anonymous reference',
* 'count' => -1,
* ],
* 'some key' => [
* 'text' => 'this one occurs once'
* 'count' => 0,
* 'number' => 4
* ],
* 3 => 'more stuff'
* ];
* </code>
*
* This works because:
* * PHP's datastructures are guaranteed to be returned in the
* order that things are inserted into them (unless you mess
* with that)
* * User supplied keys can't be integers, therefore avoiding
* conflict with anonymous keys
*
* @var array[][]
*/
private $mRefs = [];
/**
* Count for user displayed output (ref[1], ref[2], ...)
*
* @var int
*/
private $mOutCnt = 0;
/**
* @var int[]
*/
private $mGroupCnt = [];
/** /**
* The backlinks, in order, to pass as $3 to * The backlinks, in order, to pass as $3 to
* 'cite_references_link_many_format', defined in * 'cite_references_link_many_format', defined in
@ -176,13 +124,9 @@ class Cite {
private $mReferencesErrors = []; private $mReferencesErrors = [];
/** /**
* <ref> call stack * @var ReferenceStack $referenceStack
* Used to cleanup out of sequence ref calls created by #tag
* See description of function rollbackRef.
*
* @var (array|false)[]
*/ */
private $mRefCallStack = []; private $referenceStack;
/** /**
* @var bool * @var bool
@ -199,6 +143,7 @@ class Cite {
$parser->getOptions()->getUserLangObj(), $parser->getOptions()->getUserLangObj(),
$parser $parser
); );
$this->referenceStack = new ReferenceStack( $this->errorReporter );
} }
} }
@ -268,7 +213,7 @@ class Cite {
if ( is_string( $name ) && $name !== '' ) { if ( is_string( $name ) && $name !== '' ) {
$text = null; $text = null;
} else { } else {
$this->mRefCallStack[] = false; $this->referenceStack->pushInvalidRef();
return $this->errorReporter->halfParsed( 'cite_error_ref_no_input' ); return $this->errorReporter->halfParsed( 'cite_error_ref_no_input' );
} }
} }
@ -277,13 +222,13 @@ class Cite {
# Invalid attribute in the tag like <ref no_valid_attr="foo" /> # Invalid attribute in the tag like <ref no_valid_attr="foo" />
# or name and follow attribute used both in one tag checked in # or name and follow attribute used both in one tag checked in
# Cite::refArg that returns false for the name then. # Cite::refArg that returns false for the name then.
$this->mRefCallStack[] = false; $this->referenceStack->pushInvalidRef();
return $this->errorReporter->halfParsed( 'cite_error_ref_too_many_keys' ); return $this->errorReporter->halfParsed( 'cite_error_ref_too_many_keys' );
} }
if ( $text === null && $name === null ) { if ( $text === null && $name === null ) {
# Something like <ref />; this makes no sense. # Something like <ref />; this makes no sense.
$this->mRefCallStack[] = false; $this->referenceStack->pushInvalidRef();
return $this->errorReporter->halfParsed( 'cite_error_ref_no_key' ); return $this->errorReporter->halfParsed( 'cite_error_ref_no_key' );
} }
@ -293,7 +238,7 @@ class Cite {
# would be to mangle them, but it's not really high-priority # would be to mangle them, but it's not really high-priority
# (and would produce weird id's anyway). # (and would produce weird id's anyway).
$this->mRefCallStack[] = false; $this->referenceStack->pushInvalidRef();
return $this->errorReporter->halfParsed( 'cite_error_ref_numeric_key' ); return $this->errorReporter->halfParsed( 'cite_error_ref_numeric_key' );
} }
@ -312,7 +257,7 @@ class Cite {
# of the <ref> tag. This way no part of the article will be eaten # of the <ref> tag. This way no part of the article will be eaten
# even temporarily. # even temporarily.
$this->mRefCallStack[] = false; $this->referenceStack->pushInvalidRef();
return $this->errorReporter->halfParsed( 'cite_error_included_ref' ); return $this->errorReporter->halfParsed( 'cite_error_included_ref' );
} }
@ -323,7 +268,14 @@ class Cite {
# we'll figure that out later. Likewise it's definitely valid # we'll figure that out later. Likewise it's definitely valid
# if there's any content, regardless of name. # if there's any content, regardless of name.
return $this->stack( $text, $name, $group, $follow, $argv, $dir, $parser->getStripState() ); $result = $this->referenceStack->pushRef(
$text, $name, $group, $follow, $argv, $dir, $parser->getStripState() );
if ( $result === null ) {
return '';
} else {
[ $key, $count, $label, $subkey ] = $result;
return $this->linkRef( $group, $key, $count, $label, $subkey );
}
} }
# Not clear how we could get here, but something is probably # Not clear how we could get here, but something is probably
@ -360,29 +312,29 @@ class Cite {
'cite_error_empty_references_define', 'cite_error_empty_references_define',
Sanitizer::safeEncodeAttribute( $name ) Sanitizer::safeEncodeAttribute( $name )
); );
} elseif ( !isset( $this->mRefs[$group] ) && !$isSectionPreview ) { } elseif ( !$this->referenceStack->hasGroup( $group ) && !$isSectionPreview ) {
# Called with group attribute not defined in text. # Called with group attribute not defined in text.
$this->mReferencesErrors[] = $this->errorReporter->halfParsed( $this->mReferencesErrors[] = $this->errorReporter->halfParsed(
'cite_error_references_missing_group', 'cite_error_references_missing_group',
Sanitizer::safeEncodeAttribute( $group ) Sanitizer::safeEncodeAttribute( $group )
); );
} elseif ( !isset( $this->mRefs[$group][$name] ) && !$isSectionPreview ) {
# Called with name attribute not defined in text.
$this->mReferencesErrors[] = $this->errorReporter->halfParsed(
'cite_error_references_missing_key',
Sanitizer::safeEncodeAttribute( $name )
);
} elseif ( isset( $this->mRefs[$group][$name]['text'] ) &&
$this->mRefs[$group][$name]['text'] !== $text
) {
// two refs with same key and different content
// add error message to the original ref
$this->mRefs[$group][$name]['text'] .= ' ' . $this->errorReporter->plain(
'cite_error_references_duplicate_key', $name
);
} else { } else {
# Assign the text to corresponding ref $groupRefs = $this->referenceStack->getGroupRefs( $group );
$this->mRefs[$group][$name]['text'] = $text; if ( !isset( $groupRefs[$name] ) && !$isSectionPreview ) {
# Called with name attribute not defined in text.
$this->mReferencesErrors[] = $this->errorReporter->halfParsed(
'cite_error_references_missing_key',
Sanitizer::safeEncodeAttribute( $name )
);
} elseif ( isset( $groupRefs[$name]['text'] ) && $groupRefs[$name]['text'] !== $text ) {
// two refs with same key and different content
// add error message to the original ref
$text = $groupRefs[$name]['text'] . ' ' .
$this->errorReporter->plain( 'cite_error_references_duplicate_key', $name );
$this->referenceStack->setRefText( $group, $name, $text );
} else {
$this->referenceStack->setRefText( $group, $name, $text );
}
} }
} }
@ -453,177 +405,6 @@ class Cite {
return [ $name, $group, $follow, $dir, $extends ]; return [ $name, $group, $follow, $dir, $extends ];
} }
/**
* Populate $this->mRefs based on input and arguments to <ref>
*
* @param string|null $text Content from the <ref> tag
* @param string|null $name Argument to the <ref> tag as returned by $this->refArg()
* @param string $group
* @param string|null $follow Guaranteed to not be a numeric string
* @param string[] $argv
* @param string $dir ref direction
* @param StripState $stripState
*
* @throws Exception
* @return string
*/
private function stack(
$text, $name, $group, $follow, array $argv, $dir, StripState $stripState
) {
if ( !isset( $this->mRefs[$group] ) ) {
$this->mRefs[$group] = [];
}
if ( !isset( $this->mGroupCnt[$group] ) ) {
$this->mGroupCnt[$group] = 0;
}
if ( $follow ) {
// We know the parent note already, so just perform the "follow" and bail out
if ( isset( $this->mRefs[$group][$follow] ) ) {
$this->mRefs[$group][$follow]['text'] .= ' ' . $text;
return '';
}
// insert part of note at the beginning of the group
$groupsCount = count( $this->mRefs[$group] );
for ( $k = 0; $k < $groupsCount; $k++ ) {
if ( !isset( $this->mRefs[$group][$k]['follow'] ) ) {
break;
}
}
array_splice( $this->mRefs[$group], $k, 0, [ [
'count' => -1,
'text' => $text,
'key' => ++$this->mOutCnt,
'follow' => $follow,
'dir' => $dir,
] ] );
array_splice( $this->mRefCallStack, $k, 0,
[ [ 'new', $argv, $text, $name, $group, $this->mOutCnt ] ] );
// A "follow" never gets it's own footnote marker
return '';
}
if ( $name === null ) {
$this->mRefs[$group][] = [
'count' => -1,
'text' => $text,
'key' => ++$this->mOutCnt,
'dir' => $dir
];
$this->mRefCallStack[] = [ 'new', $argv, $text, $name, $group, $this->mOutCnt ];
return $this->linkRef( $group, $this->mOutCnt );
}
if ( !is_string( $name ) ) {
throw new Exception( 'Invalid stack key: ' . serialize( $name ) );
}
// Valid key with first occurrence
if ( !isset( $this->mRefs[$group][$name] ) ) {
$this->mRefs[$group][$name] = [
'text' => $text,
'count' => -1,
'key' => ++$this->mOutCnt,
'number' => ++$this->mGroupCnt[$group],
'dir' => $dir
];
$action = 'new';
} elseif ( $this->mRefs[$group][$name]['text'] === null && $text !== '' ) {
// If no text was set before, use this text
$this->mRefs[$group][$name]['text'] = $text;
// Use the dir parameter only from the full definition of a named ref tag
$this->mRefs[$group][$name]['dir'] = $dir;
$action = 'assign';
} else {
if ( $text != null && $text !== ''
// T205803 different strip markers might hide the same text
&& $stripState->unstripBoth( $text )
!== $stripState->unstripBoth( $this->mRefs[$group][$name]['text'] )
) {
// two refs with same name and different text
// add error message to the original ref
$this->mRefs[$group][$name]['text'] .= ' ' . $this->errorReporter->plain(
'cite_error_references_duplicate_key', $name
);
}
$action = 'increment';
}
$this->mRefCallStack[] = [ $action, $argv, $text, $name, $group,
$this->mRefs[$group][$name]['key'] ];
return $this->linkRef(
$group,
$name,
$this->mRefs[$group][$name]['key'] . "-" . ++$this->mRefs[$group][$name]['count'],
$this->mRefs[$group][$name]['number'],
"-" . $this->mRefs[$group][$name]['key']
);
}
/**
* Partially undoes the effect of calls to stack()
*
* Called by guardedReferences()
*
* The option to define <ref> within <references> makes the
* behavior of <ref> context dependent. This is normally fine
* but certain operations (especially #tag) lead to out-of-order
* parser evaluation with the <ref> tags being processed before
* their containing <reference> element is read. This leads to
* stack corruption that this function works to fix.
*
* This function is not a total rollback since some internal
* counters remain incremented. Doing so prevents accidentally
* corrupting certain links.
*
* @param string $type
* @param string|null $name The name attribute passed in the ref tag.
* @param string $group
* @param int $index Autoincrement counter for this ref.
*/
private function rollbackRef( $type, $name, $group, $index ) {
if ( !isset( $this->mRefs[$group] ) ) {
return;
}
$key = $name;
if ( $name === null ) {
foreach ( $this->mRefs[$group] as $k => $v ) {
if ( $this->mRefs[$group][$k]['key'] === $index ) {
$key = $k;
break;
}
}
}
// Sanity checks that specified element exists.
if ( $key === null ||
!isset( $this->mRefs[$group][$key] ) ||
$this->mRefs[$group][$key]['key'] !== $index
) {
return;
}
switch ( $type ) {
case 'new':
# Rollback the addition of new elements to the stack.
unset( $this->mRefs[$group][$key] );
if ( $this->mRefs[$group] === [] ) {
unset( $this->mRefs[$group] );
unset( $this->mGroupCnt[$group] );
}
break;
case 'assign':
# Rollback assignment of text to pre-existing elements.
$this->mRefs[$group][$key]['text'] = null;
# continue without break
case 'increment':
# Rollback increase in named ref occurrences.
$this->mRefs[$group][$key]['count']--;
break;
}
}
/** /**
* Callback function for <references> * Callback function for <references>
* *
@ -676,38 +457,18 @@ class Cite {
# conditional parser functions could be created that would # conditional parser functions could be created that would
# lead to malformed references here. # lead to malformed references here.
$count = substr_count( $text, Parser::MARKER_PREFIX . "-ref-" ); $count = substr_count( $text, Parser::MARKER_PREFIX . "-ref-" );
$redoStack = [];
# Undo effects of calling <ref> while unaware of containing <references> # Undo effects of calling <ref> while unaware of containing <references>
for ( $i = 0; $i < $count; $i++ ) { $redoStack = $this->referenceStack->rollbackRefs( $count );
if ( !$this->mRefCallStack ) {
break;
}
$call = array_pop( $this->mRefCallStack );
$redoStack[] = $call;
if ( $call !== false ) {
list( $type, $ref_argv, $ref_text,
$ref_key, $ref_group, $ref_index ) = $call;
$this->rollbackRef( $type, $ref_key, $ref_group, $ref_index );
}
}
# Rerun <ref> call now that mInReferences is set. # Rerun <ref> call now that mInReferences is set.
for ( $i = count( $redoStack ); $i--; ) { foreach ( $redoStack as $call ) {
$call = $redoStack[$i]; [ $ref_argv, $ref_text ] = $call;
if ( $call !== false ) { $this->guardedRef( $ref_text, $ref_argv, $parser );
list( $type, $ref_argv, $ref_text,
$ref_key, $ref_group, $ref_index ) = $call;
$this->guardedRef( $ref_text, $ref_argv, $parser );
}
} }
# Parse $text to process any unparsed <ref> tags. # Parse $text to process any unparsed <ref> tags.
$parser->recursiveTagParse( $text ); $parser->recursiveTagParse( $text );
# Reset call stack
$this->mRefCallStack = [];
} }
if ( isset( $argv['responsive'] ) ) { if ( isset( $argv['responsive'] ) ) {
@ -743,14 +504,15 @@ class Cite {
* @return string HTML ready for output * @return string HTML ready for output
*/ */
private function referencesFormat( $group, $responsive ) { private function referencesFormat( $group, $responsive ) {
if ( !isset( $this->mRefs[$group] ) ) { if ( !$this->referenceStack->hasGroup( $group ) ) {
return ''; return '';
} }
// Add new lines between the list items (ref entries) to avoid confusing tidy (T15073). // Add new lines between the list items (ref entries) to avoid confusing tidy (T15073).
// Note: This builds a string of wikitext, not html. // Note: This builds a string of wikitext, not html.
$parserInput = "\n"; $parserInput = "\n";
foreach ( $this->mRefs[$group] as $key => $value ) { $groupRefs = $this->referenceStack->getGroupRefs( $group );
foreach ( $groupRefs as $key => $value ) {
$parserInput .= $this->referencesFormatEntry( $key, $value ) . "\n"; $parserInput .= $this->referencesFormatEntry( $key, $value ) . "\n";
} }
$parserInput = Html::rawElement( 'ol', [ 'class' => [ 'references' ] ], $parserInput ); $parserInput = Html::rawElement( 'ol', [ 'class' => [ 'references' ] ], $parserInput );
@ -762,7 +524,7 @@ class Cite {
// Use a DIV wrap because column-count on a list directly is broken in Chrome. // Use a DIV wrap because column-count on a list directly is broken in Chrome.
// See https://bugs.chromium.org/p/chromium/issues/detail?id=498730. // See https://bugs.chromium.org/p/chromium/issues/detail?id=498730.
$wrapClasses = [ 'mw-references-wrap' ]; $wrapClasses = [ 'mw-references-wrap' ];
if ( count( $this->mRefs[$group] ) > 10 ) { if ( count( $groupRefs ) > 10 ) {
$wrapClasses[] = 'mw-references-columns'; $wrapClasses[] = 'mw-references-columns';
} }
$ret = Html::rawElement( 'div', [ 'class' => $wrapClasses ], $ret ); $ret = Html::rawElement( 'div', [ 'class' => $wrapClasses ], $ret );
@ -774,8 +536,7 @@ class Cite {
} }
// done, clean up so we can reuse the group // done, clean up so we can reuse the group
unset( $this->mRefs[$group] ); $this->referenceStack->deleteGroup( $group );
unset( $this->mGroupCnt[$group] );
return $ret; return $ret;
} }
@ -783,8 +544,8 @@ class Cite {
/** /**
* Format a single entry for the referencesFormat() function * Format a single entry for the referencesFormat() function
* *
* @param string|int $key The name or group index of the reference * @param string|int $key The key of the reference
* @param array $val A single reference as documented at {@see $mRefs} * @param array $val A single reference as documented at {@see ReferenceStack::$refs}
* @return string Wikitext, wrapped in a single <li> element * @return string Wikitext, wrapped in a single <li> element
*/ */
private function referencesFormatEntry( $key, array $val ) { private function referencesFormatEntry( $key, array $val ) {
@ -982,20 +743,16 @@ class Cite {
* @param string $key The key for the link * @param string $key The key for the link
* @param int|null $count The index of the key, used for distinguishing * @param int|null $count The index of the key, used for distinguishing
* multiple occurrences of the same key * multiple occurrences of the same key
* @param int|null $label The label to use for the link, I want to * @param int $label The label to use for the link, I want to
* use the same label for all occourances of * use the same label for all occurrences of
* the same named reference. * the same named reference.
* @param string $subkey * @param string|null $subkey
* *
* @return string * @return string
*/ */
private function linkRef( $group, $key, $count = null, $label = null, $subkey = '' ) { private function linkRef( $group, $key, $count, $label, $subkey ) {
$contLang = MediaWikiServices::getInstance()->getContentLanguage(); $contLang = MediaWikiServices::getInstance()->getContentLanguage();
if ( $label === null ) {
$label = ++$this->mGroupCnt[$group];
}
return $this->mParser->recursiveTagParse( return $this->mParser->recursiveTagParse(
wfMessage( wfMessage(
'cite_reference_link', 'cite_reference_link',
@ -1091,12 +848,9 @@ class Cite {
// Don't clear when we're in the middle of parsing a <ref> or <references> tag // Don't clear when we're in the middle of parsing a <ref> or <references> tag
return; return;
} }
if ( $this->referenceStack ) {
$this->mGroupCnt = []; $this->referenceStack->clear();
$this->mOutCnt = 0; }
$this->mRefs = [];
$this->mReferencesErrors = [];
$this->mRefCallStack = [];
} }
/** /**
@ -1128,7 +882,9 @@ class Cite {
if ( !$parserOptions->getIsPreview() ) { if ( !$parserOptions->getIsPreview() ) {
// save references data for later use by LinksUpdate hooks // save references data for later use by LinksUpdate hooks
if ( $this->mRefs && isset( $this->mRefs[self::DEFAULT_GROUP] ) ) { if ( $this->referenceStack &&
$this->referenceStack->hasGroup( self::DEFAULT_GROUP )
) {
$this->saveReferencesData( $parserOutput ); $this->saveReferencesData( $parserOutput );
} }
$isSectionPreview = false; $isSectionPreview = false;
@ -1137,20 +893,19 @@ class Cite {
} }
$s = ''; $s = '';
foreach ( $this->mRefs as $group => $refs ) { if ( $this->referenceStack ) {
if ( !$refs ) { foreach ( $this->referenceStack->getGroups() as $group ) {
continue; if ( $group === self::DEFAULT_GROUP || $isSectionPreview ) {
} $this->inReferencesGroup = $group;
if ( $group === self::DEFAULT_GROUP || $isSectionPreview ) { $s .= $this->referencesFormat( $group, $wgCiteResponsiveReferences );
$this->inReferencesGroup = $group; $this->inReferencesGroup = null;
$s .= $this->referencesFormat( $group, $wgCiteResponsiveReferences ); } else {
$this->inReferencesGroup = null; $s .= "\n<br />" .
} else { $this->errorReporter->halfParsed(
$s .= "\n<br />" . 'cite_error_group_refs_without_references',
$this->errorReporter->halfParsed( Sanitizer::safeEncodeAttribute( $group )
'cite_error_group_refs_without_references', );
Sanitizer::safeEncodeAttribute( $group ) }
);
} }
} }
if ( $isSectionPreview && $s !== '' ) { if ( $isSectionPreview && $s !== '' ) {
@ -1171,7 +926,6 @@ class Cite {
/** /**
* Saves references in parser extension data * Saves references in parser extension data
* This is called by each <references/> tag, and by checkRefsNoReferences * This is called by each <references/> tag, and by checkRefsNoReferences
* Assumes $this->mRefs[$group] is set
* *
* @param ParserOutput $parserOutput * @param ParserOutput $parserOutput
* @param string $group * @param string $group
@ -1198,7 +952,7 @@ class Cite {
} }
$n = count( $savedRefs['refs'] ) - 1; $n = count( $savedRefs['refs'] ) - 1;
// save group // save group
$savedRefs['refs'][$n][$group] = $this->mRefs[$group]; $savedRefs['refs'][$n][$group] = $this->referenceStack->getGroupRefs( $group );
$parserOutput->setExtensionData( self::EXT_DATA_KEY, $savedRefs ); $parserOutput->setExtensionData( self::EXT_DATA_KEY, $savedRefs );
} }

366
src/ReferenceStack.php Normal file
View file

@ -0,0 +1,366 @@
<?php
namespace Cite;
use InvalidArgumentException;
use StripState;
/**
* Encapsulates most of Cite state during parsing. This includes metadata about each ref tag,
* and a rollback stack to correct confusion caused by lost context when `{{#tag` is used.
*/
class ReferenceStack {
/**
* Datastructure representing <ref> input, in the format of:
* <code>
* [
* 'user supplied' => [
* 'text' => 'user supplied reference & key',
* 'count' => 1, // occurs twice
* 'number' => 1, // The first reference, we want
* // all occourances of it to
* // use the same number
* ],
* 0 => [
* 'text' => 'Anonymous reference',
* 'count' => -1,
* ],
* 1 => [
* 'text' => 'Another anonymous reference',
* 'count' => -1,
* ],
* 'some key' => [
* 'text' => 'this one occurs once'
* 'count' => 0,
* 'number' => 4
* ],
* 3 => 'more stuff'
* ];
* </code>
*
* This works because:
* * PHP's datastructures are guaranteed to be returned in the
* order that things are inserted into them (unless you mess
* with that)
* * User supplied keys can't be integers, therefore avoiding
* conflict with anonymous keys
*
* In this structure, 'key' will either be an autoincrementing integer.
*
* @var array[][]
*/
private $refs = [];
/**
* Count for user displayed output (ref[1], ref[2], ...)
*
* @var int
*/
private $refSequence = 0;
/**
* Counter for the number of refs in each group.
* @var int[]
*/
private $groupRefSequence = [];
/**
* <ref> call stack
* Used to cleanup out of sequence ref calls created by #tag
* See description of function rollbackRef.
*
* @var (array|false)[]
*/
private $refCallStack = [];
/**
* @deprecated We should be able to push this responsibility to calling code.
* @var CiteErrorReporter $errorReporter
*/
private $errorReporter;
/**
* @param CiteErrorReporter $errorReporter
*/
public function __construct( CiteErrorReporter $errorReporter ) {
$this->errorReporter = $errorReporter;
}
/**
* Leave a mark in the stack which matches an invalid ref tag.
*/
public function pushInvalidRef() {
$this->refCallStack[] = false;
}
/**
* Populate $this->refs and $this->refCallStack based on input and arguments to <ref>
*
* @param string|null $text Content from the <ref> tag
* @param string|null $name Argument to the <ref> tag as returned by $this->refArg()
* @param string $group
* @param string|null $follow Guaranteed to not be a numeric string
* @param string[] $argv
* @param string $dir ref direction
* @param StripState $stripState
*
* @return array|null
* @throws InvalidArgumentException
*/
public function pushRef(
$text, $name, $group, $follow, array $argv, $dir, StripState $stripState
) {
if ( !isset( $this->refs[$group] ) ) {
$this->refs[$group] = [];
}
if ( !isset( $this->groupRefSequence[$group] ) ) {
$this->groupRefSequence[$group] = 0;
}
if ( $follow ) {
// We know the parent note already, so just perform the "follow" and bail out
if ( isset( $this->refs[$group][$follow] ) ) {
$this->refs[$group][$follow]['text'] .= ' ' . $text;
return null;
}
// insert broken follow at the end of any other broken follows.
// FIXME: This relies on an undocumented feature of array_splice, and produces
// invalid HTML output, inserting a <p> tag into an <ol>.
$groupsCount = count( $this->refs[$group] );
for ( $k = 0; $k < $groupsCount; $k++ ) {
if ( !isset( $this->refs[$group][$k]['follow'] ) ) {
break;
}
}
array_splice( $this->refs[$group], $k, 0, [ [
'count' => -1,
'text' => $text,
'key' => ++$this->refSequence,
'follow' => $follow,
'dir' => $dir,
] ] );
array_splice( $this->refCallStack, $k, 0,
[ [ 'new', $argv, $text, $name, $group, $this->refSequence ] ] );
// A "follow" never gets its own footnote marker
return null;
}
if ( $name === null ) {
$this->refs[$group][] = [
'count' => -1,
'text' => $text,
'key' => ++$this->refSequence,
'dir' => $dir
];
$this->refCallStack[] = [ 'new', $argv, $text, $name, $group, $this->refSequence ];
return [ $this->refSequence, null, ++$this->groupRefSequence[$group], null ];
}
if ( !is_string( $name ) ) {
throw new InvalidArgumentException( 'Invalid stack key: ' . serialize( $name ) );
}
// Valid key with first occurrence
if ( !isset( $this->refs[$group][$name] ) ) {
$this->refs[$group][$name] = [
'text' => $text,
'count' => -1,
'key' => ++$this->refSequence,
'number' => ++$this->groupRefSequence[$group],
'dir' => $dir
];
$action = 'new';
} elseif ( $this->refs[$group][$name]['text'] === null && $text !== '' ) {
// If no text was set before, use this text
$this->refs[$group][$name]['text'] = $text;
// Use the dir parameter only from the full definition of a named ref tag
$this->refs[$group][$name]['dir'] = $dir;
$action = 'assign';
} else {
if ( $text != null && $text !== ''
// T205803 different strip markers might hide the same text
&& $stripState->unstripBoth( $text )
!== $stripState->unstripBoth( $this->refs[$group][$name]['text'] )
) {
// two refs with same name and different text
// add error message to the original ref
$this->refs[$group][$name]['text'] .= ' ' . $this->errorReporter->plain(
'cite_error_references_duplicate_key', $name
);
}
$action = 'increment';
}
$this->refCallStack[] = [ $action, $argv, $text, $name, $group,
$this->refs[$group][$name]['key'] ];
return [
$name,
$this->refs[$group][$name]['key'] . "-" . ++$this->refs[$group][$name]['count'],
$this->refs[$group][$name]['number'] ?? ++$this->groupRefSequence[$group],
"-" . $this->refs[$group][$name]['key']
];
}
/**
* Undo the changes made by the last $count ref tags. This is used when we discover that the
* last few tags were actually inside of a references tag.
*
* @param int $count
* @return array Refs to restore under the correct context. [ $argv, $text ]
*/
public function rollbackRefs( $count ) : array {
$redoStack = [];
for ( $i = 0; $i < $count; $i++ ) {
if ( !$this->refCallStack ) {
break;
}
$call = array_pop( $this->refCallStack );
if ( $call !== false ) {
[ $action, $argv, $text, $name, $group, $index ] = $call;
$this->rollbackRef( $action, $name, $group, $index );
$redoStack[] = [ $argv, $text ];
}
}
// Drop unused rollbacks. TODO: Warn if not fully consumed?
$this->refCallStack = [];
return array_reverse( $redoStack );
}
/**
* Partially undoes the effect of calls to stack()
*
* Called by guardedReferences()
*
* The option to define <ref> within <references> makes the
* behavior of <ref> context dependent. This is normally fine
* but certain operations (especially #tag) lead to out-of-order
* parser evaluation with the <ref> tags being processed before
* their containing <reference> element is read. This leads to
* stack corruption that this function works to fix.
*
* This function is not a total rollback since some internal
* counters remain incremented. Doing so prevents accidentally
* corrupting certain links.
*
* @param string $type
* @param string|null $name The name attribute passed in the ref tag.
* @param string $group
* @param int $index Autoincrement counter for this ref.
*/
private function rollbackRef( $type, $name, $group, $index ) {
if ( !$this->hasGroup( $group ) ) {
return;
}
$key = $name;
if ( $name === null ) {
foreach ( $this->refs[$group] as $k => $v ) {
if ( $this->refs[$group][$k]['key'] === $index ) {
$key = $k;
break;
}
}
}
// Sanity checks that specified element exists.
if ( $key === null ||
!isset( $this->refs[$group][$key] ) ||
$this->refs[$group][$key]['key'] !== $index
) {
return;
}
switch ( $type ) {
case 'new':
# Rollback the addition of new elements to the stack.
unset( $this->refs[$group][$key] );
if ( $this->refs[$group] === [] ) {
unset( $this->refs[$group] );
unset( $this->groupRefSequence[$group] );
}
break;
case 'assign':
# Rollback assignment of text to pre-existing elements.
$this->refs[$group][$key]['text'] = null;
# continue without break
case 'increment':
# Rollback increase in named ref occurrences.
$this->refs[$group][$key]['count']--;
break;
}
}
/**
* Reset all state.
*/
public function clear() {
$this->groupRefSequence = [];
$this->refSequence = 0;
$this->refs = [];
$this->mReferencesErrors = [];
$this->refCallStack = [];
}
/**
* Clear state for a single group.
*
* @param string $group
*/
public function deleteGroup( $group ) {
unset( $this->refs[$group] );
unset( $this->groupRefSequence[$group] );
}
/**
* Retruns true if the group exists.
*
* @param string $group
* @return bool
*/
public function hasGroup( string $group ) : bool {
return $this->refs && isset( $this->refs[$group] );
}
/**
* Returns a list of all groups with references.
*
* @return array
*/
public function getGroups() : array {
$groups = [];
foreach ( $this->refs as $group => $refs ) {
if ( $refs ) {
$groups[] = $group;
}
}
return $groups;
}
/**
* Return all references for a group.
*
* @param string $group
* @return array[]
*/
public function getGroupRefs( $group ) : array {
return $this->refs[$group];
}
/**
* Interface to set reference text from external code. Ideally we can take over
* responsibility for this logic.
* @deprecated
*
* @param string $group
* @param string $name
* @param string $text
*/
public function setRefText( $group, $name, $text ) {
$this->refs[$group][$name]['text'] = $text;
}
}

View file

@ -433,7 +433,7 @@ It's not possible to follow="…" a <ref> defined in the <references> section
!! end !! end
!! test !! test
A follow="…" after it's parent is not merged A follow="…" before its parent is not merged
!! wikitext !! wikitext
<ref follow="theName">theFollows</ref> <ref follow="theName">theFollows</ref>
<ref name="theName">theValue</ref> <ref name="theName">theValue</ref>
@ -446,6 +446,53 @@ A follow="…" after it's parent is not merged
</ol></div> </ol></div>
!! end !! end
# This is a nasty edge case which was dropping the ref entirely.
!! test
"follow" after a named ref but before its parent
!! wikitext
<ref name="first">First</ref>
<ref follow="third">Second</ref>
<ref name="third">Third</ref>
<hr />
<references />
!! html/php
<p><sup id="cite_ref-first_1-0" class="reference"><a href="#cite_note-first-1">&#91;1&#93;</a></sup>
</p><p><sup id="cite_ref-third_3-0" class="reference"><a href="#cite_note-third-3">&#91;2&#93;</a></sup>
</p>
<hr />
<div class="mw-references-wrap"><ol class="references">
<p id="cite_note-third"><span class="reference-text">Second</span>
</p>
<li id="cite_note-first-1"><span class="mw-cite-backlink"><a href="#cite_ref-first_1-0">↑</a></span> <span class="reference-text">First</span>
</li>
<li id="cite_note-third-3"><span class="mw-cite-backlink"><a href="#cite_ref-third_3-0">↑</a></span> <span class="reference-text">Third</span>
</li>
</ol></div>
!! end
!! test
"follow" after an anonymous ref but before its parent
!! wikitext
<ref>First</ref>
<ref follow="third">Second</ref>
<ref name="third">Third</ref>
<hr />
<references />
!! html/php
<p><sup id="cite_ref-1" class="reference"><a href="#cite_note-1">&#91;1&#93;</a></sup>
</p><p><sup id="cite_ref-third_3-0" class="reference"><a href="#cite_note-third-3">&#91;2&#93;</a></sup>
</p>
<hr />
<div class="mw-references-wrap"><ol class="references">
<p id="cite_note-third"><span class="reference-text">Second</span>
</p>
<li id="cite_note-1"><span class="mw-cite-backlink"><a href="#cite_ref-1">↑</a></span> <span class="reference-text">First</span>
</li>
<li id="cite_note-third-3"><span class="mw-cite-backlink"><a href="#cite_ref-third_3-0">↑</a></span> <span class="reference-text">Third</span>
</li>
</ol></div>
!! end
!! test !! test
Valid follow="…" after it's parent Valid follow="…" after it's parent
!! wikitext !! wikitext

View file

@ -0,0 +1,368 @@
<?php
namespace Cite\Tests\Unit;
use Cite\CiteErrorReporter;
use Cite\ReferenceStack;
use InvalidArgumentException;
use MediaWikiUnitTestCase;
use StripState;
use Wikimedia\TestingAccessWrapper;
/**
* @coversDefaultClass \Cite\ReferenceStack
*/
class ReferenceStackTest extends MediaWikiUnitTestCase {
/**
* @covers ::pushInvalidRef
*/
public function testPushInvalidRef() {
$stack = $this->newStack();
$stack->pushInvalidRef();
$spy = TestingAccessWrapper::newFromObject( $stack );
$this->assertSame( [ false ], $spy->refCallStack );
}
// TODO: testRollbackRefs()
// TODO: testGetGroupRefs()
/**
* @covers ::pushRef
*
* @dataProvider providePushRef
*/
public function testPushRefs(
array $refs,
$expectedOutputs,
array $finalRefs,
array $finalCallStack
) {
$mockStripState = $this->createMock( StripState::class );
$mockStripState->method( 'unstripBoth' )->willReturnArgument( 0 );
$stack = $this->newStack();
for ( $i = 0; $i < count( $refs ); $i++ ) {
[ $text, $name, $group, $follow, $argv, $dir ] = $refs[$i];
if ( is_string( $expectedOutputs ) ) {
$this->expectException( $expectedOutputs );
}
$result = $stack->pushRef(
$text, $name, $group, $follow, $argv, $dir, $mockStripState );
$this->assertSame( $expectedOutputs[$i], $result );
}
$spy = TestingAccessWrapper::newFromObject( $stack );
$this->assertSame( $finalRefs, $spy->refs );
$this->assertSame( $finalCallStack, $spy->refCallStack );
}
public function providePushRef() {
return [
'Anonymous ref in default group' => [
[
[ null, null, '', null, [], 'rtl' ]
],
[
[ 1, null, 1, null ]
],
[
'' => [
[
'count' => -1,
'text' => null,
'key' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], null, null, '', 1 ]
]
],
'Anonymous ref in named group' => [
[
[ null, null, 'foo', null, [], 'rtl' ]
],
[
[ 1, null, 1, null ]
],
[
'foo' => [
[
'count' => -1,
'text' => null,
'key' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], null, null, 'foo', 1 ]
]
],
'Ref with text' => [
[
[ 'text', null, 'foo', null, [], 'rtl' ]
],
[
[ 1, null, 1, null ]
],
[
'foo' => [
[
'count' => -1,
'text' => 'text',
'key' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text', null, 'foo', 1 ]
]
],
'Named ref with text' => [
[
[ 'text', 'name', 'foo', null, [], 'rtl' ]
],
[
[ 'name', '1-0', 1, '-1' ]
],
[
'foo' => [
'name' => [
'text' => 'text',
'count' => 0,
'key' => 1,
'number' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text', 'name', 'foo', 1 ]
]
],
'Follow after base' => [
[
[ 'text-a', 'a', 'foo', null, [], 'rtl' ],
[ 'text-b', 'b', 'foo', 'a', [], 'rtl' ]
],
[
[ 'a', '1-0', 1, '-1' ],
null
],
[
'foo' => [
'a' => [
'text' => 'text-a text-b',
'count' => 0,
'key' => 1,
'number' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text-a', 'a', 'foo', 1 ]
]
],
'Follow with no base' => [
[
[ 'text', 'b', 'foo', 'a', [], 'rtl' ]
],
[
null
],
[
'foo' => [
[
'count' => -1,
'text' => 'text',
'key' => 1,
'follow' => 'a',
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text', 'b', 'foo', 1 ]
]
],
'Follow pointing to later ref' => [
[
[ 'text-a', 'a', 'foo', null, [], 'rtl' ],
[ 'text-b', 'b', 'foo', 'c', [], 'rtl' ],
[ 'text-c', 'c', 'foo', null, [], 'rtl' ]
],
[
[ 'a', '1-0', 1, '-1' ],
null,
[ 'c', '3-0', 2, '-3' ],
],
[
'foo' => [
0 => [
'count' => -1,
'text' => 'text-b',
'key' => 2,
'follow' => 'c',
'dir' => 'rtl',
],
'a' => [
'text' => 'text-a',
'count' => 0,
'key' => 1,
'number' => 1,
'dir' => 'rtl',
],
'c' => [
'text' => 'text-c',
'count' => 0,
'key' => 3,
'number' => 2,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text-b', 'b', 'foo', 2 ],
[ 'new', [], 'text-a', 'a', 'foo', 1 ],
[ 'new', [], 'text-c', 'c', 'foo', 3 ]
]
],
'Repeated ref, text in first tag' => [
[
[ 'text', 'a', 'foo', null, [], 'rtl' ],
[ null, 'a', 'foo', null, [], 'rtl' ]
],
[
[ 'a', '1-0', 1, '-1' ],
[ 'a', '1-1', 1, '-1' ],
],
[
'foo' => [
'a' => [
'text' => 'text',
'count' => 1,
'key' => 1,
'number' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text', 'a', 'foo', 1 ],
[ 'increment', [], null, 'a', 'foo', 1 ]
]
],
'Repeated ref, text in second tag' => [
[
[ null, 'a', 'foo', null, [], 'rtl' ],
[ 'text', 'a', 'foo', null, [], 'rtl' ]
],
[
[ 'a', '1-0', 1, '-1' ],
[ 'a', '1-1', 1, '-1' ],
],
[
'foo' => [
'a' => [
'text' => 'text',
'count' => 1,
'key' => 1,
'number' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], null, 'a', 'foo', 1 ],
[ 'assign', [], 'text', 'a', 'foo', 1 ]
]
],
'Repeated ref, mismatched text' => [
[
[ 'text-1', 'a', 'foo', null, [], 'rtl' ],
[ 'text-2', 'a', 'foo', null, [], 'rtl' ]
],
[
[ 'a', '1-0', 1, '-1' ],
[ 'a', '1-1', 1, '-1' ],
],
[
'foo' => [
'a' => [
'text' => 'text-1 cite_error_references_duplicate_key',
'count' => 1,
'key' => 1,
'number' => 1,
'dir' => 'rtl',
]
]
],
[
[ 'new', [], 'text-1', 'a', 'foo', 1 ],
[ 'increment', [], 'text-2', 'a', 'foo', 1 ]
]
],
// FIXME: Split this off into a separate test method
'Illegal value for name' => [
[
[ null, 123, '', null, [], 'rtl' ]
],
InvalidArgumentException::class,
[],
[]
],
];
}
/**
* @covers ::getGroups
*/
public function testGetGroups() {
$stack = $this->newStack();
$spy = TestingAccessWrapper::newFromObject( $stack );
$spy->refs = [ 'havenot' => [], 'have' => [ [ 'ref etc' ] ] ];
$this->assertSame( [ 'have' ], $stack->getGroups() );
}
/**
* @covers ::hasGroup
*/
public function testHasGroup() {
$stack = $this->newStack();
$spy = TestingAccessWrapper::newFromObject( $stack );
$spy->refs = [ 'present' => [ [ 'ref etc' ] ] ];
$this->assertFalse( $stack->hasGroup( 'absent' ) );
$this->assertTrue( $stack->hasGroup( 'present' ) );
}
/**
* @covers ::setRefText
*/
public function testSetRefText() {
$stack = $this->newStack();
$stack->setRefText( 'group', 'name', 'the-text' );
$spy = TestingAccessWrapper::newFromObject( $stack );
$this->assertSame(
[ 'group' => [ 'name' => [ 'text' => 'the-text' ] ] ], $spy->refs );
}
private function newStack() {
$errorReporter = $this->createMock( CiteErrorReporter::class );
$errorReporter->method( 'plain' )->willReturnArgument( 0 );
return new ReferenceStack( $errorReporter );
}
}