Extract validation to a separate class

This removes almost 200 lines from the main class.

This patch intentionally doesn't make any changes to the code but
only moves it around. Further improvements are for later patches.

Bug: T353269
Change-Id: Ic73f1b7458b3f7b7b89806a88a1111161e3cf094
This commit is contained in:
thiemowmde 2023-12-12 11:15:14 +01:00 committed by Thiemo Kreuz (WMDE)
parent 428494cd82
commit a6a0f66130
4 changed files with 175 additions and 152 deletions

View file

@ -26,6 +26,7 @@ namespace Cite;
use LogicException;
use MediaWiki\Html\Html;
use MediaWiki\MediaWikiServices;
use MediaWiki\Parser\Sanitizer;
use Parser;
use StatusValue;
@ -108,139 +109,6 @@ class Cite {
return $ret;
}
private function validateRef(
?string $text,
string $group,
?string $name,
?string $extends,
?string $follow,
?string $dir
): StatusValue {
if ( ctype_digit( (string)$name )
|| ctype_digit( (string)$extends )
|| ctype_digit( (string)$follow )
) {
// Numeric names mess up the resulting id's, potentially producing
// duplicate id's in the XHTML. The Right Thing To Do
// would be to mangle them, but it's not really high-priority
// (and would produce weird id's anyway).
return StatusValue::newFatal( 'cite_error_ref_numeric_key' );
}
if ( $extends ) {
// Temporary feature flag until mainstreamed, see T236255
global $wgCiteBookReferencing;
if ( !$wgCiteBookReferencing ) {
return StatusValue::newFatal( 'cite_error_ref_too_many_keys' );
}
$groupRefs = $this->referenceStack->getGroupRefs( $group );
if ( isset( $groupRefs[$name] ) && !isset( $groupRefs[$name]['extends'] ) ) {
// T242141: A top-level <ref> can't be changed into a sub-reference
return StatusValue::newFatal( 'cite_error_references_duplicate_key', $name );
} elseif ( isset( $groupRefs[$extends]['extends'] ) ) {
// A sub-reference can not be extended a second time (no nesting)
return StatusValue::newFatal( 'cite_error_ref_nested_extends', $extends,
$groupRefs[$extends]['extends'] );
}
}
if ( $follow && ( $name || $extends ) ) {
// TODO: Introduce a specific error for this case.
return StatusValue::newFatal( 'cite_error_ref_too_many_keys' );
}
if ( $dir !== null && !in_array( strtolower( $dir ), [ 'ltr', 'rtl' ], true ) ) {
return StatusValue::newFatal( 'cite_error_ref_invalid_dir', $dir );
}
return $this->inReferencesGroup === null ?
$this->validateRefOutsideOfReferences( $text, $name ) :
$this->validateRefInReferences( $text, $group, $name );
}
private function validateRefOutsideOfReferences(
?string $text,
?string $name
): StatusValue {
if ( !$name ) {
if ( $text === null ) {
// Completely empty ref like <ref /> is forbidden.
return StatusValue::newFatal( 'cite_error_ref_no_key' );
} elseif ( trim( $text ) === '' ) {
// Must have content or reuse another ref by name.
return StatusValue::newFatal( 'cite_error_ref_no_input' );
}
}
if ( $text !== null && preg_match(
'/<ref(erences)?\b[^>]*+>/i',
preg_replace( '#<(\w++)[^>]*+>.*?</\1\s*>|<!--.*?-->#s', '', $text )
) ) {
// (bug T8199) This most likely implies that someone left off the
// closing </ref> tag, which will cause the entire article to be
// eaten up until the next <ref>. So we bail out early instead.
// The fancy regex above first tries chopping out anything that
// looks like a comment or SGML tag, which is a crude way to avoid
// false alarms for <nowiki>, <pre>, etc.
//
// Possible improvement: print the warning, followed by the contents
// of the <ref> tag. This way no part of the article will be eaten
// even temporarily.
return StatusValue::newFatal( 'cite_error_included_ref' );
}
return StatusValue::newGood();
}
private function validateRefInReferences(
?string $text,
string $group,
?string $name
): StatusValue {
if ( $group !== $this->inReferencesGroup ) {
// <ref> and <references> have conflicting group attributes.
return StatusValue::newFatal( 'cite_error_references_group_mismatch',
Sanitizer::safeEncodeAttribute( $group ) );
}
if ( !$name ) {
// <ref> calls inside <references> must be named
return StatusValue::newFatal( 'cite_error_references_no_key' );
}
if ( $text === null || trim( $text ) === '' ) {
// <ref> called in <references> has no content.
return StatusValue::newFatal(
'cite_error_empty_references_define',
Sanitizer::safeEncodeAttribute( $name ),
Sanitizer::safeEncodeAttribute( $group )
);
}
// Section previews are exempt from some rules.
if ( !$this->isSectionPreview ) {
if ( !$this->referenceStack->hasGroup( $group ) ) {
// Called with group attribute not defined in text.
return StatusValue::newFatal(
'cite_error_references_missing_group',
Sanitizer::safeEncodeAttribute( $group ),
Sanitizer::safeEncodeAttribute( $name )
);
}
$groupRefs = $this->referenceStack->getGroupRefs( $group );
if ( !isset( $groupRefs[$name] ) ) {
// No such named ref exists in this group.
return StatusValue::newFatal( 'cite_error_references_missing_key',
Sanitizer::safeEncodeAttribute( $name ) );
}
}
return StatusValue::newGood();
}
/**
* @param Parser $parser
* @param ?string $text Raw, untrimmed wikitext content of the <ref> tag, if any
@ -267,8 +135,14 @@ class Cite {
// Use the default group, or the references group when inside one.
$arguments['group'] ??= $this->inReferencesGroup ?? self::DEFAULT_GROUP;
$validator = new Validator(
$this->referenceStack,
$this->inReferencesGroup,
$this->isSectionPreview,
MediaWikiServices::getInstance()->getMainConfig()->get( 'CiteBookReferencing' )
);
// @phan-suppress-next-line PhanParamTooFewUnpack No good way to document it.
$status->merge( $this->validateRef( $text, ...array_values( $arguments ) ) );
$status->merge( $validator->validateRef( $text, ...array_values( $arguments ) ) );
if ( !$status->isGood() && $this->inReferencesGroup !== null ) {
foreach ( $status->getErrors() as $error ) {

View file

@ -19,7 +19,7 @@ class ReferenceStack {
* string for the default group) and reference name.
*
* References without a name get a numeric index, starting from 0. Conflicts are avoided by
* disallowing numeric names (e.g. <ref name="1">) in {@see Cite::validateRef}.
* disallowing numeric names (e.g. <ref name="1">) in {@see Validator::validateRef}.
*
* Elements (almost all are optional):
* - 'name': The original name="" of a reference (also used as the array key), or null for

159
src/Validator.php Normal file
View file

@ -0,0 +1,159 @@
<?php
namespace Cite;
use MediaWiki\Parser\Sanitizer;
use StatusValue;
class Validator {
private ReferenceStack $referenceStack;
private ?string $inReferencesGroup;
private bool $isSectionPreview;
private bool $bookReferencing;
public function __construct(
ReferenceStack $referenceStack,
?string $inReferencesGroup = null,
bool $isSectionPreview = false,
bool $bookReferencing = false
) {
$this->referenceStack = $referenceStack;
$this->inReferencesGroup = $inReferencesGroup;
$this->isSectionPreview = $isSectionPreview;
$this->bookReferencing = $bookReferencing;
}
public function validateRef(
?string $text,
string $group,
?string $name,
?string $extends,
?string $follow,
?string $dir
): StatusValue {
if ( ctype_digit( (string)$name )
|| ctype_digit( (string)$extends )
|| ctype_digit( (string)$follow )
) {
// Numeric names mess up the resulting id's, potentially producing
// duplicate id's in the XHTML. The Right Thing To Do
// would be to mangle them, but it's not really high-priority
// (and would produce weird id's anyway).
return StatusValue::newFatal( 'cite_error_ref_numeric_key' );
}
if ( $extends ) {
// Temporary feature flag until mainstreamed, see T236255
if ( !$this->bookReferencing ) {
return StatusValue::newFatal( 'cite_error_ref_too_many_keys' );
}
$groupRefs = $this->referenceStack->getGroupRefs( $group );
if ( isset( $groupRefs[$name] ) && !isset( $groupRefs[$name]['extends'] ) ) {
// T242141: A top-level <ref> can't be changed into a sub-reference
return StatusValue::newFatal( 'cite_error_references_duplicate_key', $name );
} elseif ( isset( $groupRefs[$extends]['extends'] ) ) {
// A sub-reference can not be extended a second time (no nesting)
return StatusValue::newFatal( 'cite_error_ref_nested_extends', $extends,
$groupRefs[$extends]['extends'] );
}
}
if ( $follow && ( $name || $extends ) ) {
// TODO: Introduce a specific error for this case.
return StatusValue::newFatal( 'cite_error_ref_too_many_keys' );
}
if ( $dir !== null && !in_array( strtolower( $dir ), [ 'ltr', 'rtl' ], true ) ) {
return StatusValue::newFatal( 'cite_error_ref_invalid_dir', $dir );
}
return $this->inReferencesGroup === null ?
$this->validateRefOutsideOfReferences( $text, $name ) :
$this->validateRefInReferences( $text, $group, $name );
}
private function validateRefOutsideOfReferences(
?string $text,
?string $name
): StatusValue {
if ( !$name ) {
if ( $text === null ) {
// Completely empty ref like <ref /> is forbidden.
return StatusValue::newFatal( 'cite_error_ref_no_key' );
} elseif ( trim( $text ) === '' ) {
// Must have content or reuse another ref by name.
return StatusValue::newFatal( 'cite_error_ref_no_input' );
}
}
if ( $text !== null && preg_match(
'/<ref(erences)?\b[^>]*+>/i',
preg_replace( '#<(\w++)[^>]*+>.*?</\1\s*>|<!--.*?-->#s', '', $text )
) ) {
// (bug T8199) This most likely implies that someone left off the
// closing </ref> tag, which will cause the entire article to be
// eaten up until the next <ref>. So we bail out early instead.
// The fancy regex above first tries chopping out anything that
// looks like a comment or SGML tag, which is a crude way to avoid
// false alarms for <nowiki>, <pre>, etc.
//
// Possible improvement: print the warning, followed by the contents
// of the <ref> tag. This way no part of the article will be eaten
// even temporarily.
return StatusValue::newFatal( 'cite_error_included_ref' );
}
return StatusValue::newGood();
}
private function validateRefInReferences(
?string $text,
string $group,
?string $name
): StatusValue {
if ( $group !== $this->inReferencesGroup ) {
// <ref> and <references> have conflicting group attributes.
return StatusValue::newFatal( 'cite_error_references_group_mismatch',
Sanitizer::safeEncodeAttribute( $group ) );
}
if ( !$name ) {
// <ref> calls inside <references> must be named
return StatusValue::newFatal( 'cite_error_references_no_key' );
}
if ( $text === null || trim( $text ) === '' ) {
// <ref> called in <references> has no content.
return StatusValue::newFatal(
'cite_error_empty_references_define',
Sanitizer::safeEncodeAttribute( $name ),
Sanitizer::safeEncodeAttribute( $group )
);
}
// Section previews are exempt from some rules.
if ( !$this->isSectionPreview ) {
if ( !$this->referenceStack->hasGroup( $group ) ) {
// Called with group attribute not defined in text.
return StatusValue::newFatal(
'cite_error_references_missing_group',
Sanitizer::safeEncodeAttribute( $group ),
Sanitizer::safeEncodeAttribute( $name )
);
}
$groupRefs = $this->referenceStack->getGroupRefs( $group );
if ( !isset( $groupRefs[$name] ) ) {
// No such named ref exists in this group.
return StatusValue::newFatal( 'cite_error_references_missing_key',
Sanitizer::safeEncodeAttribute( $name ) );
}
}
return StatusValue::newGood();
}
}

View file

@ -7,6 +7,7 @@ use Cite\ErrorReporter;
use Cite\FootnoteMarkFormatter;
use Cite\ReferencesFormatter;
use Cite\ReferenceStack;
use Cite\Validator;
use Language;
use LogicException;
use Parser;
@ -23,9 +24,7 @@ use Wikimedia\TestingAccessWrapper;
class CiteTest extends \MediaWikiIntegrationTestCase {
/**
* @covers ::validateRef
* @covers ::validateRefOutsideOfReferences
* @covers ::validateRefInReferences
* @covers \Cite\Validator
* @dataProvider provideValidateRef
*/
public function testValidateRef(
@ -40,19 +39,13 @@ class CiteTest extends \MediaWikiIntegrationTestCase {
?string $dir,
?string $expected
) {
$this->overrideConfigValue( 'CiteBookReferencing', true );
$errorReporter = $this->createMock( ErrorReporter::class );
$stack = new ReferenceStack( $errorReporter );
TestingAccessWrapper::newFromObject( $stack )->refs = $referencesStack;
/** @var Cite $cite */
$cite = TestingAccessWrapper::newFromObject( $this->newCite() );
$cite->referenceStack = $stack;
$cite->inReferencesGroup = $inReferencesGroup;
$cite->isSectionPreview = $isSectionPreview;
$validator = new Validator( $stack, $inReferencesGroup, $isSectionPreview, true );
$status = $cite->validateRef( $text, $group, $name, $extends, $follow, $dir );
$status = $validator->validateRef( $text, $group, $name, $extends, $follow, $dir );
if ( $expected ) {
$this->assertStatusError( $expected, $status );
} else {
@ -298,14 +291,11 @@ class CiteTest extends \MediaWikiIntegrationTestCase {
}
/**
* @covers ::validateRef
* @covers \Cite\Validator
*/
public function testValidateRef_noExtends() {
$this->overrideConfigValue( 'CiteBookReferencing', false );
/** @var Cite $cite */
$cite = TestingAccessWrapper::newFromObject( $this->newCite() );
$status = $cite->validateRef( 'text', '', 'name', 'a', null, null );
$validator = new Validator( $this->createNoOpMock( ReferenceStack::class ) );
$status = $validator->validateRef( 'text', '', 'name', 'a', null, null );
$this->assertStatusError( 'cite_error_ref_too_many_keys', $status );
}