2020-09-25 22:32:01 +00:00
|
|
|
<?php
|
|
|
|
|
2021-01-02 13:41:31 +00:00
|
|
|
namespace MediaWiki\Extension\AbuseFilter\Variables;
|
2020-09-25 22:32:01 +00:00
|
|
|
|
|
|
|
use ContentHandler;
|
|
|
|
use Language;
|
2024-06-12 18:01:35 +00:00
|
|
|
use MediaWiki\Content\TextContent;
|
2020-09-25 22:32:01 +00:00
|
|
|
use MediaWiki\Extension\AbuseFilter\Hooks\AbuseFilterHookRunner;
|
|
|
|
use MediaWiki\Extension\AbuseFilter\Parser\AFPData;
|
2021-01-02 13:41:31 +00:00
|
|
|
use MediaWiki\Extension\AbuseFilter\TextExtractor;
|
2023-04-24 13:12:41 +00:00
|
|
|
use MediaWiki\ExternalLinks\ExternalLinksLookup;
|
2023-05-26 13:47:06 +00:00
|
|
|
use MediaWiki\ExternalLinks\LinkFilter;
|
2021-03-09 17:56:04 +00:00
|
|
|
use MediaWiki\Permissions\PermissionManager;
|
2022-03-29 13:59:59 +00:00
|
|
|
use MediaWiki\Permissions\RestrictionStore;
|
2022-02-26 19:39:01 +00:00
|
|
|
use MediaWiki\Revision\RevisionLookup;
|
2021-09-07 10:09:14 +00:00
|
|
|
use MediaWiki\Revision\RevisionRecord;
|
2022-02-26 19:39:01 +00:00
|
|
|
use MediaWiki\Revision\RevisionStore;
|
2021-09-07 10:09:14 +00:00
|
|
|
use MediaWiki\Revision\SlotRecord;
|
2022-08-01 09:04:50 +00:00
|
|
|
use MediaWiki\Storage\PreparedUpdate;
|
2023-08-19 17:49:36 +00:00
|
|
|
use MediaWiki\Title\Title;
|
2024-02-27 17:25:35 +00:00
|
|
|
use MediaWiki\User\ExternalUserNames;
|
2023-12-10 19:03:19 +00:00
|
|
|
use MediaWiki\User\User;
|
2021-03-09 17:56:04 +00:00
|
|
|
use MediaWiki\User\UserEditTracker;
|
|
|
|
use MediaWiki\User\UserGroupManager;
|
2021-03-19 12:06:00 +00:00
|
|
|
use MediaWiki\User\UserIdentity;
|
2024-02-27 17:25:35 +00:00
|
|
|
use MediaWiki\User\UserIdentityUtils;
|
2023-07-29 12:20:07 +00:00
|
|
|
use ParserFactory;
|
2020-09-25 22:32:01 +00:00
|
|
|
use ParserOptions;
|
|
|
|
use Psr\Log\LoggerInterface;
|
|
|
|
use stdClass;
|
|
|
|
use StringUtils;
|
2023-06-07 15:41:20 +00:00
|
|
|
use UnexpectedValueException;
|
2020-09-25 22:32:01 +00:00
|
|
|
use WANObjectCache;
|
2023-06-29 01:56:13 +00:00
|
|
|
use Wikimedia\Diff\Diff;
|
|
|
|
use Wikimedia\Diff\UnifiedDiffFormatter;
|
2024-02-27 17:25:35 +00:00
|
|
|
use Wikimedia\IPUtils;
|
2020-09-25 22:32:01 +00:00
|
|
|
use Wikimedia\Rdbms\Database;
|
2023-02-26 12:51:08 +00:00
|
|
|
use Wikimedia\Rdbms\LBFactory;
|
2024-04-30 18:21:20 +00:00
|
|
|
use Wikimedia\Rdbms\SelectQueryBuilder;
|
2020-09-25 22:32:01 +00:00
|
|
|
use WikiPage;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Service used to compute lazy-loaded variable.
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
class LazyVariableComputer {
|
|
|
|
public const SERVICE_NAME = 'AbuseFilterLazyVariableComputer';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var float The amount of time to subtract from profiling
|
|
|
|
* @todo This is a hack
|
|
|
|
*/
|
|
|
|
public static $profilingExtraTime = 0;
|
|
|
|
|
2020-09-29 18:48:51 +00:00
|
|
|
/** @var TextExtractor */
|
|
|
|
private $textExtractor;
|
|
|
|
|
2020-09-25 22:32:01 +00:00
|
|
|
/** @var AbuseFilterHookRunner */
|
|
|
|
private $hookRunner;
|
|
|
|
|
|
|
|
/** @var LoggerInterface */
|
|
|
|
private $logger;
|
|
|
|
|
2023-02-26 12:51:08 +00:00
|
|
|
/** @var LBFactory */
|
|
|
|
private $lbFactory;
|
2020-09-25 22:32:01 +00:00
|
|
|
|
|
|
|
/** @var WANObjectCache */
|
|
|
|
private $wanCache;
|
|
|
|
|
|
|
|
/** @var RevisionLookup */
|
|
|
|
private $revisionLookup;
|
|
|
|
|
|
|
|
/** @var RevisionStore */
|
|
|
|
private $revisionStore;
|
|
|
|
|
|
|
|
/** @var Language */
|
|
|
|
private $contentLanguage;
|
|
|
|
|
2023-07-29 12:20:07 +00:00
|
|
|
/** @var ParserFactory */
|
|
|
|
private $parserFactory;
|
2020-09-25 22:32:01 +00:00
|
|
|
|
2021-03-09 17:56:04 +00:00
|
|
|
/** @var UserEditTracker */
|
|
|
|
private $userEditTracker;
|
|
|
|
|
|
|
|
/** @var UserGroupManager */
|
|
|
|
private $userGroupManager;
|
|
|
|
|
|
|
|
/** @var PermissionManager */
|
|
|
|
private $permissionManager;
|
|
|
|
|
2022-03-29 13:59:59 +00:00
|
|
|
/** @var RestrictionStore */
|
|
|
|
private $restrictionStore;
|
|
|
|
|
2024-02-27 17:25:35 +00:00
|
|
|
/** @var UserIdentityUtils */
|
|
|
|
private $userIdentityUtils;
|
|
|
|
|
2020-09-25 22:32:01 +00:00
|
|
|
/** @var string */
|
|
|
|
private $wikiID;
|
|
|
|
|
|
|
|
/**
|
2020-09-29 18:48:51 +00:00
|
|
|
* @param TextExtractor $textExtractor
|
2020-09-25 22:32:01 +00:00
|
|
|
* @param AbuseFilterHookRunner $hookRunner
|
|
|
|
* @param LoggerInterface $logger
|
2023-02-26 12:51:08 +00:00
|
|
|
* @param LBFactory $lbFactory
|
2020-09-25 22:32:01 +00:00
|
|
|
* @param WANObjectCache $wanCache
|
|
|
|
* @param RevisionLookup $revisionLookup
|
|
|
|
* @param RevisionStore $revisionStore
|
|
|
|
* @param Language $contentLanguage
|
2023-07-29 12:20:07 +00:00
|
|
|
* @param ParserFactory $parserFactory
|
2021-03-09 17:56:04 +00:00
|
|
|
* @param UserEditTracker $userEditTracker
|
|
|
|
* @param UserGroupManager $userGroupManager
|
|
|
|
* @param PermissionManager $permissionManager
|
2022-03-29 13:59:59 +00:00
|
|
|
* @param RestrictionStore $restrictionStore
|
2024-02-27 17:25:35 +00:00
|
|
|
* @param UserIdentityUtils $userIdentityUtils
|
2020-09-25 22:32:01 +00:00
|
|
|
* @param string $wikiID
|
|
|
|
*/
|
|
|
|
public function __construct(
|
2020-09-29 18:48:51 +00:00
|
|
|
TextExtractor $textExtractor,
|
2020-09-25 22:32:01 +00:00
|
|
|
AbuseFilterHookRunner $hookRunner,
|
|
|
|
LoggerInterface $logger,
|
2023-02-26 12:51:08 +00:00
|
|
|
LBFactory $lbFactory,
|
2020-09-25 22:32:01 +00:00
|
|
|
WANObjectCache $wanCache,
|
|
|
|
RevisionLookup $revisionLookup,
|
|
|
|
RevisionStore $revisionStore,
|
|
|
|
Language $contentLanguage,
|
2023-07-29 12:20:07 +00:00
|
|
|
ParserFactory $parserFactory,
|
2021-03-09 17:56:04 +00:00
|
|
|
UserEditTracker $userEditTracker,
|
|
|
|
UserGroupManager $userGroupManager,
|
|
|
|
PermissionManager $permissionManager,
|
2022-03-29 13:59:59 +00:00
|
|
|
RestrictionStore $restrictionStore,
|
2024-02-27 17:25:35 +00:00
|
|
|
UserIdentityUtils $userIdentityUtils,
|
2020-09-25 22:32:01 +00:00
|
|
|
string $wikiID
|
|
|
|
) {
|
2020-09-29 18:48:51 +00:00
|
|
|
$this->textExtractor = $textExtractor;
|
2020-09-25 22:32:01 +00:00
|
|
|
$this->hookRunner = $hookRunner;
|
|
|
|
$this->logger = $logger;
|
2023-02-26 12:51:08 +00:00
|
|
|
$this->lbFactory = $lbFactory;
|
2020-09-25 22:32:01 +00:00
|
|
|
$this->wanCache = $wanCache;
|
|
|
|
$this->revisionLookup = $revisionLookup;
|
|
|
|
$this->revisionStore = $revisionStore;
|
|
|
|
$this->contentLanguage = $contentLanguage;
|
2023-07-29 12:20:07 +00:00
|
|
|
$this->parserFactory = $parserFactory;
|
2021-03-09 17:56:04 +00:00
|
|
|
$this->userEditTracker = $userEditTracker;
|
|
|
|
$this->userGroupManager = $userGroupManager;
|
|
|
|
$this->permissionManager = $permissionManager;
|
2022-03-29 13:59:59 +00:00
|
|
|
$this->restrictionStore = $restrictionStore;
|
2024-02-27 17:25:35 +00:00
|
|
|
$this->userIdentityUtils = $userIdentityUtils;
|
2020-09-25 22:32:01 +00:00
|
|
|
$this->wikiID = $wikiID;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2020-10-18 22:25:05 +00:00
|
|
|
* XXX: $getVarCB is a hack to hide the cyclic dependency with VariablesManager. See T261069 for possible
|
|
|
|
* solutions. This might also be merged into VariablesManager, but it would bring a ton of dependencies.
|
2021-01-08 17:17:41 +00:00
|
|
|
* @todo Should we remove $vars parameter (check hooks)?
|
2020-10-18 22:25:05 +00:00
|
|
|
*
|
2021-01-02 14:01:00 +00:00
|
|
|
* @param LazyLoadedVariable $var
|
|
|
|
* @param VariableHolder $vars
|
2020-10-18 22:25:05 +00:00
|
|
|
* @param callable $getVarCB
|
|
|
|
* @phan-param callable(string $name):AFPData $getVarCB
|
2020-09-25 22:32:01 +00:00
|
|
|
* @return AFPData
|
|
|
|
*/
|
2021-01-02 14:01:00 +00:00
|
|
|
public function compute( LazyLoadedVariable $var, VariableHolder $vars, callable $getVarCB ) {
|
|
|
|
$parameters = $var->getParameters();
|
|
|
|
$varMethod = $var->getMethod();
|
2020-09-25 22:32:01 +00:00
|
|
|
$result = null;
|
|
|
|
|
2021-03-06 17:18:07 +00:00
|
|
|
if ( !$this->hookRunner->onAbuseFilter_interceptVariable(
|
2021-01-02 14:01:00 +00:00
|
|
|
$varMethod,
|
2020-09-25 22:32:01 +00:00
|
|
|
$vars,
|
|
|
|
$parameters,
|
|
|
|
$result
|
|
|
|
) ) {
|
|
|
|
return $result instanceof AFPData
|
|
|
|
? $result : AFPData::newFromPHPVar( $result );
|
|
|
|
}
|
|
|
|
|
2021-01-02 14:01:00 +00:00
|
|
|
switch ( $varMethod ) {
|
2020-09-25 22:32:01 +00:00
|
|
|
case 'diff':
|
|
|
|
$text1Var = $parameters['oldtext-var'];
|
|
|
|
$text2Var = $parameters['newtext-var'];
|
2020-10-18 22:25:05 +00:00
|
|
|
$text1 = $getVarCB( $text1Var )->toString();
|
|
|
|
$text2 = $getVarCB( $text2Var )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
// T74329: if there's no text, don't return an array with the empty string
|
|
|
|
$text1 = $text1 === '' ? [] : explode( "\n", $text1 );
|
|
|
|
$text2 = $text2 === '' ? [] : explode( "\n", $text2 );
|
|
|
|
$diffs = new Diff( $text1, $text2 );
|
|
|
|
$format = new UnifiedDiffFormatter();
|
|
|
|
$result = $format->format( $diffs );
|
|
|
|
break;
|
|
|
|
case 'diff-split':
|
2020-10-18 22:25:05 +00:00
|
|
|
$diff = $getVarCB( $parameters['diff-var'] )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
$line_prefix = $parameters['line-prefix'];
|
|
|
|
$diff_lines = explode( "\n", $diff );
|
|
|
|
$result = [];
|
|
|
|
foreach ( $diff_lines as $line ) {
|
2022-05-03 08:59:09 +00:00
|
|
|
if ( ( $line[0] ?? '' ) === $line_prefix ) {
|
|
|
|
$result[] = substr( $line, 1 );
|
2020-09-25 22:32:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2024-04-27 07:40:24 +00:00
|
|
|
case 'array-diff':
|
|
|
|
$baseVar = $parameters['base-var'];
|
|
|
|
$minusVar = $parameters['minus-var'];
|
|
|
|
|
|
|
|
$baseArray = $getVarCB( $baseVar )->toNative();
|
|
|
|
$minusArray = $getVarCB( $minusVar )->toNative();
|
|
|
|
|
|
|
|
$result = array_diff( $baseArray, $minusArray );
|
|
|
|
break;
|
2020-09-25 22:32:01 +00:00
|
|
|
case 'links-from-wikitext':
|
|
|
|
// This should ONLY be used when sharing a parse operation with the edit.
|
|
|
|
|
|
|
|
/** @var WikiPage $article */
|
|
|
|
$article = $parameters['article'];
|
|
|
|
if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
|
|
|
|
// Shared with the edit, don't count it in profiling
|
|
|
|
$startTime = microtime( true );
|
|
|
|
$textVar = $parameters['text-var'];
|
|
|
|
|
2020-10-18 22:25:05 +00:00
|
|
|
$new_text = $getVarCB( $textVar )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
$content = ContentHandler::makeContent( $new_text, $article->getTitle() );
|
2021-06-24 03:30:34 +00:00
|
|
|
$editInfo = $article->prepareContentForEdit(
|
|
|
|
$content,
|
|
|
|
null,
|
2022-06-27 19:50:47 +00:00
|
|
|
$parameters['contextUserIdentity']
|
2021-06-24 03:30:34 +00:00
|
|
|
);
|
2023-05-26 13:47:06 +00:00
|
|
|
$result = LinkFilter::getIndexedUrlsNonReversed(
|
|
|
|
array_keys( $editInfo->output->getExternalLinks() )
|
|
|
|
);
|
2020-09-25 22:32:01 +00:00
|
|
|
self::$profilingExtraTime += ( microtime( true ) - $startTime );
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Otherwise fall back to database
|
|
|
|
case 'links-from-wikitext-or-database':
|
|
|
|
// TODO: use Content object instead, if available!
|
2021-01-08 17:17:41 +00:00
|
|
|
/** @var WikiPage $article */
|
2023-06-12 08:27:03 +00:00
|
|
|
$article ??= $parameters['article'];
|
2020-09-25 22:32:01 +00:00
|
|
|
|
2021-12-19 13:19:16 +00:00
|
|
|
// this inference is ugly, but the name isn't accessible from here
|
|
|
|
// and we only want this for debugging
|
2023-06-13 11:36:02 +00:00
|
|
|
$textVar = $parameters['text-var'];
|
2024-04-11 12:07:43 +00:00
|
|
|
$varName = str_starts_with( $textVar, 'old_' ) ? 'old_links' : 'all_links';
|
2022-04-03 09:22:49 +00:00
|
|
|
if ( $parameters['forFilter'] ?? false ) {
|
2021-12-19 13:19:16 +00:00
|
|
|
$this->logger->debug( "Loading $varName from DB" );
|
2020-09-25 22:32:01 +00:00
|
|
|
$links = $this->getLinksFromDB( $article );
|
|
|
|
} elseif ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
|
2021-12-19 13:19:16 +00:00
|
|
|
$this->logger->debug( "Loading $varName from Parser" );
|
2020-09-25 22:32:01 +00:00
|
|
|
|
2020-10-18 22:25:05 +00:00
|
|
|
$wikitext = $getVarCB( $textVar )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
$editInfo = $this->parseNonEditWikitext(
|
|
|
|
$wikitext,
|
|
|
|
$article,
|
2022-06-27 19:50:47 +00:00
|
|
|
$parameters['contextUserIdentity']
|
2020-09-25 22:32:01 +00:00
|
|
|
);
|
2023-05-26 13:47:06 +00:00
|
|
|
$links = LinkFilter::getIndexedUrlsNonReversed(
|
|
|
|
array_keys( $editInfo->output->getExternalLinks() )
|
|
|
|
);
|
2020-09-25 22:32:01 +00:00
|
|
|
} else {
|
|
|
|
// TODO: Get links from Content object. But we don't have the content object.
|
|
|
|
// And for non-text content, $wikitext is usually not going to be a valid
|
|
|
|
// serialization, but rather some dummy text for filtering.
|
|
|
|
$links = [];
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = $links;
|
|
|
|
break;
|
2022-08-01 09:04:50 +00:00
|
|
|
case 'links-from-update':
|
|
|
|
/** @var PreparedUpdate $update */
|
|
|
|
$update = $parameters['update'];
|
|
|
|
// Shared with the edit, don't count it in profiling
|
|
|
|
$startTime = microtime( true );
|
|
|
|
$result = LinkFilter::getIndexedUrlsNonReversed(
|
|
|
|
array_keys( $update->getParserOutputForMetaData()->getExternalLinks() )
|
|
|
|
);
|
|
|
|
self::$profilingExtraTime += ( microtime( true ) - $startTime );
|
|
|
|
break;
|
2023-06-13 11:36:02 +00:00
|
|
|
case 'links-from-database':
|
|
|
|
/** @var WikiPage $article */
|
|
|
|
$article = $parameters['article'];
|
|
|
|
$this->logger->debug( 'Loading old_links from DB' );
|
|
|
|
$result = $this->getLinksFromDB( $article );
|
|
|
|
break;
|
2020-09-25 22:32:01 +00:00
|
|
|
case 'parse-wikitext':
|
|
|
|
// Should ONLY be used when sharing a parse operation with the edit.
|
2021-01-08 17:17:41 +00:00
|
|
|
// TODO: use Content object instead, if available!
|
2020-09-25 22:32:01 +00:00
|
|
|
/* @var WikiPage $article */
|
|
|
|
$article = $parameters['article'];
|
|
|
|
if ( $article->getContentModel() === CONTENT_MODEL_WIKITEXT ) {
|
|
|
|
// Shared with the edit, don't count it in profiling
|
|
|
|
$startTime = microtime( true );
|
|
|
|
$textVar = $parameters['wikitext-var'];
|
|
|
|
|
2020-10-18 22:25:05 +00:00
|
|
|
$new_text = $getVarCB( $textVar )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
$content = ContentHandler::makeContent( $new_text, $article->getTitle() );
|
2021-06-24 03:30:34 +00:00
|
|
|
$editInfo = $article->prepareContentForEdit(
|
|
|
|
$content,
|
|
|
|
null,
|
2022-06-27 19:50:47 +00:00
|
|
|
$parameters['contextUserIdentity']
|
2021-06-24 03:30:34 +00:00
|
|
|
);
|
2020-09-25 22:32:01 +00:00
|
|
|
if ( isset( $parameters['pst'] ) && $parameters['pst'] ) {
|
|
|
|
$result = $editInfo->pstContent->serialize( $editInfo->format );
|
|
|
|
} else {
|
2021-10-24 13:09:21 +00:00
|
|
|
// Note: as of core change r727361, the PP limit comments (which we don't want to be here)
|
|
|
|
// are already excluded.
|
|
|
|
$result = $editInfo->getOutput()->getText();
|
2020-09-25 22:32:01 +00:00
|
|
|
}
|
|
|
|
self::$profilingExtraTime += ( microtime( true ) - $startTime );
|
|
|
|
} else {
|
|
|
|
$result = '';
|
|
|
|
}
|
|
|
|
break;
|
2023-07-16 12:48:30 +00:00
|
|
|
case 'html-from-update':
|
|
|
|
/** @var PreparedUpdate $update */
|
|
|
|
$update = $parameters['update'];
|
|
|
|
// Shared with the edit, don't count it in profiling
|
|
|
|
$startTime = microtime( true );
|
|
|
|
$result = $update->getCanonicalParserOutput()->getText();
|
|
|
|
self::$profilingExtraTime += ( microtime( true ) - $startTime );
|
|
|
|
break;
|
2020-09-25 22:32:01 +00:00
|
|
|
case 'strip-html':
|
|
|
|
$htmlVar = $parameters['html-var'];
|
2020-10-18 22:25:05 +00:00
|
|
|
$html = $getVarCB( $htmlVar )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
$stripped = StringUtils::delimiterReplace( '<', '>', '', $html );
|
|
|
|
// We strip extra spaces to the right because the stripping above
|
|
|
|
// could leave a lot of whitespace.
|
|
|
|
// @fixme Find a better way to do this.
|
|
|
|
$result = TextContent::normalizeLineEndings( $stripped );
|
|
|
|
break;
|
|
|
|
case 'load-recent-authors':
|
2021-01-08 17:17:41 +00:00
|
|
|
$result = $this->getLastPageAuthors( $parameters['title'] );
|
2020-09-25 22:32:01 +00:00
|
|
|
break;
|
|
|
|
case 'load-first-author':
|
2021-01-08 17:17:41 +00:00
|
|
|
$revision = $this->revisionLookup->getFirstRevision( $parameters['title'] );
|
2020-09-25 22:32:01 +00:00
|
|
|
if ( $revision ) {
|
2021-03-09 21:53:36 +00:00
|
|
|
// TODO T233241
|
2020-09-25 22:32:01 +00:00
|
|
|
$user = $revision->getUser();
|
|
|
|
$result = $user === null ? '' : $user->getName();
|
|
|
|
} else {
|
|
|
|
$result = '';
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'get-page-restrictions':
|
|
|
|
$action = $parameters['action'];
|
2021-01-08 17:17:41 +00:00
|
|
|
/** @var Title $title */
|
|
|
|
$title = $parameters['title'];
|
2022-03-29 13:59:59 +00:00
|
|
|
$result = $this->restrictionStore->getRestrictions( $title, $action );
|
2024-02-27 17:25:35 +00:00
|
|
|
break;
|
2024-03-22 13:40:06 +00:00
|
|
|
case 'user-unnamed-ip':
|
|
|
|
$user = $parameters['user'];
|
|
|
|
$result = null;
|
|
|
|
|
|
|
|
// Don't return an IP for past events (eg. revisions, logs)
|
|
|
|
// This could leak IPs to users who don't have IP viewing rights
|
|
|
|
if ( !$parameters['rc'] &&
|
|
|
|
// Reveal IPs for:
|
|
|
|
// - temporary accounts: temporary account names will replace the IP in the `user_name`
|
|
|
|
// variable. This variable restores this access.
|
|
|
|
// - logged-out users: This supports the transition to the use of temporary accounts
|
|
|
|
// so that filter maintainers on pre-transition wikis can migrate `user_name` to `user_unnamed_ip`
|
|
|
|
// where necessary and see no disruption on transition.
|
|
|
|
//
|
|
|
|
// This variable should only ever be exposed for these use cases and shouldn't be extended
|
|
|
|
// to registered accounts, as that would leak account PII to users without the right to see
|
|
|
|
// that information
|
|
|
|
( $this->userIdentityUtils->isTemp( $user ) || IPUtils::isIPAddress( $user->getName() ) ) ) {
|
|
|
|
$result = $user->getRequest()->getIP();
|
|
|
|
}
|
|
|
|
break;
|
2024-02-27 17:25:35 +00:00
|
|
|
case 'user-type':
|
|
|
|
/** @var UserIdentity $userIdentity */
|
|
|
|
$userIdentity = $parameters['user-identity'];
|
|
|
|
if ( $this->userIdentityUtils->isNamed( $userIdentity ) ) {
|
|
|
|
$result = 'named';
|
|
|
|
} elseif ( $this->userIdentityUtils->isTemp( $userIdentity ) ) {
|
|
|
|
$result = 'temp';
|
|
|
|
} elseif ( IPUtils::isIPAddress( $userIdentity->getName() ) ) {
|
|
|
|
$result = 'ip';
|
|
|
|
} elseif ( ExternalUserNames::isExternal( $userIdentity->getName() ) ) {
|
|
|
|
$result = 'external';
|
|
|
|
} else {
|
|
|
|
$result = 'unknown';
|
|
|
|
}
|
2020-09-25 22:32:01 +00:00
|
|
|
break;
|
2021-01-10 11:27:08 +00:00
|
|
|
case 'user-editcount':
|
2021-03-19 12:06:00 +00:00
|
|
|
/** @var UserIdentity $userIdentity */
|
2021-03-09 17:56:04 +00:00
|
|
|
$userIdentity = $parameters['user-identity'];
|
2021-06-26 13:55:17 +00:00
|
|
|
$result = $this->userEditTracker->getUserEditCount( $userIdentity );
|
2021-01-10 11:27:08 +00:00
|
|
|
break;
|
|
|
|
case 'user-emailconfirm':
|
|
|
|
/** @var User $user */
|
|
|
|
$user = $parameters['user'];
|
|
|
|
$result = $user->getEmailAuthenticationTimestamp();
|
|
|
|
break;
|
|
|
|
case 'user-groups':
|
2021-03-19 12:06:00 +00:00
|
|
|
/** @var UserIdentity $userIdentity */
|
2021-03-09 17:56:04 +00:00
|
|
|
$userIdentity = $parameters['user-identity'];
|
|
|
|
$result = $this->userGroupManager->getUserEffectiveGroups( $userIdentity );
|
2021-01-10 11:27:08 +00:00
|
|
|
break;
|
|
|
|
case 'user-rights':
|
2021-03-19 12:06:00 +00:00
|
|
|
/** @var UserIdentity $userIdentity */
|
2021-03-09 17:56:04 +00:00
|
|
|
$userIdentity = $parameters['user-identity'];
|
|
|
|
$result = $this->permissionManager->getUserPermissions( $userIdentity );
|
2020-09-25 22:32:01 +00:00
|
|
|
break;
|
|
|
|
case 'user-block':
|
2021-01-10 11:27:08 +00:00
|
|
|
// @todo Support partial blocks?
|
|
|
|
/** @var User $user */
|
2020-09-25 22:32:01 +00:00
|
|
|
$user = $parameters['user'];
|
|
|
|
$result = (bool)$user->getBlock();
|
|
|
|
break;
|
|
|
|
case 'user-age':
|
2021-01-08 17:17:41 +00:00
|
|
|
/** @var User $user */
|
2020-09-25 22:32:01 +00:00
|
|
|
$user = $parameters['user'];
|
|
|
|
$asOf = $parameters['asof'];
|
|
|
|
|
2021-01-08 17:17:41 +00:00
|
|
|
if ( !$user->isRegistered() ) {
|
2020-09-25 22:32:01 +00:00
|
|
|
$result = 0;
|
|
|
|
} else {
|
|
|
|
// HACK: If there's no registration date, assume 2008-01-15, Wikipedia Day
|
|
|
|
// in the year before the new user log was created. See T243469.
|
2023-06-12 08:27:03 +00:00
|
|
|
$registration = $user->getRegistration() ?? "20080115000000";
|
2020-09-25 22:32:01 +00:00
|
|
|
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $registration );
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'page-age':
|
2021-01-08 17:17:41 +00:00
|
|
|
/** @var Title $title */
|
|
|
|
$title = $parameters['title'];
|
2020-09-25 22:32:01 +00:00
|
|
|
|
2021-01-08 17:17:41 +00:00
|
|
|
$firstRev = $this->revisionLookup->getFirstRevision( $title );
|
|
|
|
$firstRevisionTime = $firstRev ? $firstRev->getTimestamp() : null;
|
2020-09-25 22:32:01 +00:00
|
|
|
if ( !$firstRevisionTime ) {
|
|
|
|
$result = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
$asOf = $parameters['asof'];
|
|
|
|
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $firstRevisionTime );
|
|
|
|
break;
|
2022-11-26 13:20:37 +00:00
|
|
|
case 'revision-age-by-id':
|
|
|
|
$timestamp = $this->revisionLookup->getTimestampFromId( $parameters['revid'] );
|
|
|
|
if ( !$timestamp ) {
|
|
|
|
$result = null;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$asOf = $parameters['asof'];
|
|
|
|
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $timestamp );
|
|
|
|
break;
|
|
|
|
case 'revision-age-by-title':
|
|
|
|
/** @var Title $title */
|
|
|
|
$title = $parameters['title'];
|
|
|
|
$revRec = $this->revisionLookup->getRevisionByTitle( $title );
|
|
|
|
if ( !$revRec ) {
|
|
|
|
$result = null;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$asOf = $parameters['asof'];
|
|
|
|
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $revRec->getTimestamp() );
|
|
|
|
break;
|
|
|
|
case 'previous-revision-age':
|
|
|
|
$revRec = $this->revisionLookup->getRevisionById( $parameters['revid'] );
|
|
|
|
if ( !$revRec ) {
|
|
|
|
$result = null;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$prev = $this->revisionLookup->getPreviousRevision( $revRec );
|
|
|
|
if ( !$prev ) {
|
|
|
|
$result = null;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$asOf = $parameters['asof'] ?? $revRec->getTimestamp();
|
|
|
|
$result = (int)wfTimestamp( TS_UNIX, $asOf ) - (int)wfTimestamp( TS_UNIX, $prev->getTimestamp() );
|
|
|
|
break;
|
2020-09-25 22:32:01 +00:00
|
|
|
case 'length':
|
2020-10-18 22:25:05 +00:00
|
|
|
$s = $getVarCB( $parameters['length-var'] )->toString();
|
2020-09-25 22:32:01 +00:00
|
|
|
$result = strlen( $s );
|
|
|
|
break;
|
|
|
|
case 'subtract-int':
|
2020-10-18 22:25:05 +00:00
|
|
|
$v1 = $getVarCB( $parameters['val1-var'] )->toInt();
|
|
|
|
$v2 = $getVarCB( $parameters['val2-var'] )->toInt();
|
2020-09-25 22:32:01 +00:00
|
|
|
$result = $v1 - $v2;
|
|
|
|
break;
|
2021-09-07 10:09:14 +00:00
|
|
|
case 'content-model-by-id':
|
|
|
|
$revRec = $this->revisionLookup->getRevisionById( $parameters['revid'] );
|
|
|
|
$result = $this->getContentModelFromRevision( $revRec );
|
|
|
|
break;
|
2020-09-25 22:32:01 +00:00
|
|
|
case 'revision-text-by-id':
|
|
|
|
$revRec = $this->revisionLookup->getRevisionById( $parameters['revid'] );
|
2020-09-29 18:48:51 +00:00
|
|
|
$result = $this->textExtractor->revisionToString( $revRec, $parameters['contextUser'] );
|
2020-09-25 22:32:01 +00:00
|
|
|
break;
|
|
|
|
case 'get-wiki-name':
|
|
|
|
$result = $this->wikiID;
|
|
|
|
break;
|
|
|
|
case 'get-wiki-language':
|
|
|
|
$result = $this->contentLanguage->getCode();
|
|
|
|
break;
|
|
|
|
default:
|
2021-03-06 17:18:07 +00:00
|
|
|
if ( $this->hookRunner->onAbuseFilter_computeVariable(
|
2021-01-02 14:01:00 +00:00
|
|
|
$varMethod,
|
2020-09-25 22:32:01 +00:00
|
|
|
$vars,
|
|
|
|
$parameters,
|
|
|
|
$result
|
|
|
|
) ) {
|
2023-06-07 15:41:20 +00:00
|
|
|
throw new UnexpectedValueException( 'Unknown variable compute type ' . $varMethod );
|
2020-09-25 22:32:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-04 12:07:11 +00:00
|
|
|
return $result instanceof AFPData ? $result : AFPData::newFromPHPVar( $result );
|
2020-09-25 22:32:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param WikiPage $article
|
|
|
|
* @return array
|
|
|
|
*/
|
|
|
|
private function getLinksFromDB( WikiPage $article ) {
|
|
|
|
$id = $article->getId();
|
|
|
|
if ( !$id ) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
2023-04-24 13:12:41 +00:00
|
|
|
return ExternalLinksLookup::getExternalLinksForPage(
|
|
|
|
$id,
|
|
|
|
$this->lbFactory->getReplicaDatabase(),
|
|
|
|
__METHOD__
|
2020-09-25 22:32:01 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2021-01-14 16:18:13 +00:00
|
|
|
* @todo Move to MW core (T272050)
|
2020-09-25 22:32:01 +00:00
|
|
|
* @param Title $title
|
|
|
|
* @return string[] Usernames of the last 10 (unique) authors from $title
|
|
|
|
*/
|
|
|
|
private function getLastPageAuthors( Title $title ) {
|
|
|
|
if ( !$title->exists() ) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
|
|
|
$fname = __METHOD__;
|
|
|
|
|
|
|
|
return $this->wanCache->getWithSetCallback(
|
|
|
|
$this->wanCache->makeKey( 'last-10-authors', 'revision', $title->getLatestRevID() ),
|
|
|
|
WANObjectCache::TTL_MINUTE,
|
|
|
|
function ( $oldValue, &$ttl, array &$setOpts ) use ( $title, $fname ) {
|
2023-02-26 12:51:08 +00:00
|
|
|
$dbr = $this->lbFactory->getReplicaDatabase();
|
2021-03-21 19:46:44 +00:00
|
|
|
|
2020-09-25 22:32:01 +00:00
|
|
|
$setOpts += Database::getCacheSetOptions( $dbr );
|
|
|
|
// Get the last 100 edit authors with a trivial query (avoid T116557)
|
|
|
|
$revQuery = $this->revisionStore->getQueryInfo();
|
2024-04-30 18:21:20 +00:00
|
|
|
$revAuthors = $dbr->newSelectQueryBuilder()
|
|
|
|
->tables( $revQuery['tables'] )
|
|
|
|
->field( $revQuery['fields']['rev_user_text'] )
|
|
|
|
->where( [
|
2021-03-09 21:53:36 +00:00
|
|
|
'rev_page' => $title->getArticleID(),
|
|
|
|
// TODO Should deleted names be counted in the 10 authors? If yes, this check should
|
|
|
|
// be moved inside the foreach
|
|
|
|
'rev_deleted' => 0
|
2024-04-30 18:21:20 +00:00
|
|
|
] )
|
|
|
|
->caller( $fname )
|
2020-09-25 22:32:01 +00:00
|
|
|
// Some pages have < 10 authors but many revisions (e.g. bot pages)
|
2024-04-30 18:21:20 +00:00
|
|
|
->orderBy( [ 'rev_timestamp', 'rev_id' ], SelectQueryBuilder::SORT_DESC )
|
|
|
|
->limit( 100 )
|
|
|
|
// Force index per T116557
|
|
|
|
->useIndex( [ 'revision' => 'rev_page_timestamp' ] )
|
|
|
|
->joinConds( $revQuery['joins'] )
|
|
|
|
->fetchFieldValues();
|
2020-09-25 22:32:01 +00:00
|
|
|
// Get the last 10 distinct authors within this set of edits
|
|
|
|
$users = [];
|
|
|
|
foreach ( $revAuthors as $author ) {
|
|
|
|
$users[$author] = 1;
|
|
|
|
if ( count( $users ) >= 10 ) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return array_keys( $users );
|
|
|
|
}
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2021-09-07 10:09:14 +00:00
|
|
|
/**
|
|
|
|
* @param ?RevisionRecord $revision
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
private function getContentModelFromRevision( ?RevisionRecord $revision ): string {
|
|
|
|
// this is consistent with what is done on various places in RunVariableGenerator
|
|
|
|
// and RCVariableGenerator
|
|
|
|
if ( $revision !== null ) {
|
|
|
|
$content = $revision->getContent( SlotRecord::MAIN, RevisionRecord::RAW );
|
|
|
|
return $content->getModel();
|
|
|
|
}
|
|
|
|
return '';
|
|
|
|
}
|
|
|
|
|
2020-09-25 22:32:01 +00:00
|
|
|
/**
|
2021-03-24 17:32:03 +00:00
|
|
|
* It's like WikiPage::prepareContentForEdit, but not for editing (old wikitext usually)
|
2020-09-25 22:32:01 +00:00
|
|
|
*
|
|
|
|
* @param string $wikitext
|
|
|
|
* @param WikiPage $article
|
2022-06-27 19:50:47 +00:00
|
|
|
* @param UserIdentity $userIdentity Context user
|
2020-09-25 22:32:01 +00:00
|
|
|
*
|
|
|
|
* @return stdClass
|
|
|
|
*/
|
2022-06-27 19:50:47 +00:00
|
|
|
private function parseNonEditWikitext( $wikitext, WikiPage $article, UserIdentity $userIdentity ) {
|
2020-09-25 22:32:01 +00:00
|
|
|
static $cache = [];
|
|
|
|
|
|
|
|
$cacheKey = md5( $wikitext ) . ':' . $article->getTitle()->getPrefixedText();
|
|
|
|
|
2022-04-26 15:46:01 +00:00
|
|
|
if ( !isset( $cache[$cacheKey] ) ) {
|
2022-06-27 19:50:47 +00:00
|
|
|
$options = ParserOptions::newFromUser( $userIdentity );
|
2022-04-26 15:46:01 +00:00
|
|
|
$cache[$cacheKey] = (object)[
|
2023-07-29 12:20:07 +00:00
|
|
|
'output' => $this->parserFactory->getInstance()->parse( $wikitext, $article->getTitle(), $options )
|
2022-04-26 15:46:01 +00:00
|
|
|
];
|
2020-09-25 22:32:01 +00:00
|
|
|
}
|
|
|
|
|
2022-04-26 15:46:01 +00:00
|
|
|
return $cache[$cacheKey];
|
2020-09-25 22:32:01 +00:00
|
|
|
}
|
|
|
|
}
|