assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); $this->options = $options; $this->categoryManager = $categoryManager; $this->dbLoadBalancerFactory = $dbLoadBalancerFactory; } /** * @param int $mode DB_PRIMARY or DB_REPLICA * @return IDatabase */ public function getDBConnectionRef( int $mode ): IDatabase { return $this->dbLoadBalancerFactory->getMainLB()->getConnection( $mode ); } /** * @return IReadableDatabase */ public function getReplicaDBConnection(): IReadableDatabase { return $this->dbLoadBalancerFactory->getReplicaDatabase(); } /** * Get a specific LintError by id * * @param int $id linter_id * @return bool|LintError */ public function getFromId( int $id ) { $row = $this->getReplicaDBConnection()->newSelectQueryBuilder() ->select( [ 'linter_cat', 'linter_params', 'linter_start', 'linter_end' ] ) ->from( 'linter' ) ->where( [ 'linter_id' => $id ] ) ->caller( __METHOD__ ) ->fetchRow(); if ( $row ) { $row->linter_id = $id; return self::makeLintError( $this->categoryManager, $row ); } else { return false; } } /** * Turn a database row into a LintError object * * @param CategoryManager $categoryManager * @param stdClass $row * @return LintError|bool false on error */ public static function makeLintError( CategoryManager $categoryManager, $row ) { try { $name = $categoryManager->getCategoryName( $row->linter_cat ); } catch ( MissingCategoryException $e ) { LoggerFactory::getInstance( 'Linter' )->error( 'Could not find name for id: {linter_cat}', [ 'linter_cat' => $row->linter_cat ] ); return false; } return new LintError( $name, [ (int)$row->linter_start, (int)$row->linter_end ], $row->linter_params, $row->linter_cat, (int)$row->linter_id ); } /** * Get all the lint errors for a page * * @param int $pageId * @return LintError[] */ public function getForPage( int $pageId ) { $rows = $this->getReplicaDBConnection()->newSelectQueryBuilder() ->select( [ 'linter_id', 'linter_cat', 'linter_start', 'linter_end', 'linter_params' ] ) ->from( 'linter' ) ->where( [ 'linter_page' => $pageId ] ) ->caller( __METHOD__ ) ->fetchResultSet(); $result = []; foreach ( $rows as $row ) { $error = self::makeLintError( $this->categoryManager, $row ); if ( !$error ) { continue; } $result[$error->id()] = $error; } return $result; } /** * Convert a LintError object into an array for * inserting/querying in the database * * @param int $pageId * @param int $namespaceId * @param LintError $error * @return array */ private function buildErrorRow( int $pageId, int $namespaceId, LintError $error ) { $result = [ 'linter_page' => $pageId, 'linter_cat' => $this->categoryManager->getCategoryId( $error->category, $error->catId ), 'linter_params' => FormatJson::encode( $error->params, false, FormatJson::ALL_OK ), 'linter_start' => $error->location[ 0 ], 'linter_end' => $error->location[ 1 ], 'linter_namespace' => $namespaceId ]; $templateInfo = $error->templateInfo ?? ''; if ( is_array( $templateInfo ) ) { if ( isset( $templateInfo[ 'multiPartTemplateBlock' ] ) ) { $templateInfo = 'multi-part-template-block'; } else { $templateInfo = $templateInfo[ 'name' ] ?? ''; } } $templateInfo = mb_strcut( $templateInfo, 0, self::MAX_TEMPLATE_LENGTH ); $result[ 'linter_template' ] = $templateInfo; $tagInfo = $error->tagInfo ?? ''; $tagInfo = mb_strcut( $tagInfo, 0, self::MAX_TAG_LENGTH ); $result[ 'linter_tag' ] = $tagInfo; return $result; } /** * @param LintError[] $errors * @return array */ private function countByCat( array $errors ) { $count = []; foreach ( $errors as $error ) { if ( !isset( $count[$error->category] ) ) { $count[$error->category] = 1; } else { $count[$error->category] += 1; } } return $count; } /** * Save the specified lint errors in the * database * * @param int $pageId * @param int $namespaceId * @param LintError[] $errors * @return array [ 'deleted' => [ cat => count ], 'added' => [ cat => count ] ] */ public function setForPage( int $pageId, int $namespaceId, $errors ) { $previous = $this->getForPage( $pageId ); $dbw = $this->getDBConnectionRef( DB_PRIMARY ); if ( !$previous && !$errors ) { return [ 'deleted' => [], 'added' => [] ]; } elseif ( !$previous && $errors ) { $toInsert = array_values( $errors ); $toDelete = []; } elseif ( $previous && !$errors ) { $dbw->newDeleteQueryBuilder() ->deleteFrom( 'linter' ) ->where( [ 'linter_page' => $pageId ] ) ->caller( __METHOD__ ) ->execute(); return [ 'deleted' => $this->countByCat( $previous ), 'added' => [] ]; } else { $toInsert = []; $toDelete = $previous; // Diff previous and errors foreach ( $errors as $error ) { $uniqueId = $error->id(); if ( isset( $previous[$uniqueId] ) ) { unset( $toDelete[$uniqueId] ); } else { $toInsert[] = $error; } } } if ( $toDelete ) { $ids = []; foreach ( $toDelete as $lintError ) { if ( $lintError->lintId ) { $ids[] = $lintError->lintId; } } $dbw->newDeleteQueryBuilder() ->deleteFrom( 'linter' ) ->where( [ 'linter_id' => $ids ] ) ->caller( __METHOD__ ) ->execute(); } if ( $toInsert ) { // Insert into db, ignoring any duplicate key errors // since they're the same lint error $dbw->newInsertQueryBuilder() ->insertInto( 'linter' ) ->ignore() ->rows( array_map( function ( LintError $error ) use ( $pageId, $namespaceId ) { return $this->buildErrorRow( $pageId, $namespaceId, $error ); }, $toInsert ) ) ->caller( __METHOD__ ) ->execute(); } return [ 'deleted' => $this->countByCat( $toDelete ), 'added' => $this->countByCat( $toInsert ), ]; } /** * Get an estimate of how many rows are there for the * specified category with EXPLAIN SELECT COUNT(*). * If the category actually has no rows, then 0 will * be returned. * * @param int $catId * @return int */ private function getTotalsEstimate( $catId ) { $dbr = $this->getReplicaDBConnection(); // First see if there are no rows, or a moderate number // within the limit specified by the MAX_ACCURATE_COUNT. // The distinction between 0, a few and a lot is important // to determine first, as estimateRowCount seem to never // return 0 or accurate low error counts. $rows = $dbr->newSelectQueryBuilder() ->select( '*' ) ->from( 'linter' ) ->where( [ 'linter_cat' => $catId ] ) // Select 1 more so we can see if we're over the max limit ->limit( self::MAX_ACCURATE_COUNT + 1 ) ->caller( __METHOD__ ) ->fetchRowCount(); // Return an accurate count if the number of errors is // below the maximum accurate count limit if ( $rows <= self::MAX_ACCURATE_COUNT ) { return $rows; } // Now we can just estimate if the maximum accurate count limit // was returned, which isn't the actual count but the limit reached return $dbr->newSelectQueryBuilder() ->select( '*' ) ->from( 'linter' ) ->where( [ 'linter_cat' => $catId ] ) ->caller( __METHOD__ ) ->estimateRowCount(); } /** * This uses COUNT(*), which is accurate, but can be significantly * slower depending upon how many rows are in the database. * * @param int $pageId * @return int[] */ public function getTotalsForPage( int $pageId ): array { $rows = $this->getReplicaDBConnection()->newSelectQueryBuilder() ->select( [ 'linter_cat', 'COUNT(*) AS count' ] ) ->from( 'linter' ) ->where( [ 'linter_page' => $pageId ] ) ->caller( __METHOD__ ) ->groupBy( 'linter_cat' ) ->fetchResultSet(); // Initialize zero values $categories = $this->categoryManager->getVisibleCategories(); $ret = array_fill_keys( $categories, 0 ); foreach ( $rows as $row ) { try { $catName = $this->categoryManager->getCategoryName( $row->linter_cat ); } catch ( MissingCategoryException $e ) { continue; } // Only set visible categories. Alternatively, we could add another // where clause to the selection above. if ( !in_array( $catName, $categories, true ) ) { continue; } $ret[$catName] = (int)$row->count; } return $ret; } /** * @return int[] */ public function getTotals() { $ret = []; foreach ( $this->categoryManager->getVisibleCategories() as $cat ) { $id = $this->categoryManager->getCategoryId( $cat ); $ret[$cat] = $this->getTotalsEstimate( $id ); } return $ret; } /** * This code migrates namespace ID identified by the Linter records linter_page * field and populates the new linter_namespace field if it is unpopulated. * This code is intended to be run once though it could be run multiple times * using `--force` if needed via the maintenance script. * It is safe to run more than once, and will quickly exit if no records need updating. * * @param int $pageBatchSize * @param int $linterBatchSize * @param int $sleep * @return int number of pages updated, each with one or more linter records */ public function migrateNamespace( int $pageBatchSize, int $linterBatchSize, int $sleep ): int { if ( $sleep < 0 ) { $sleep = 0; } $logger = LoggerFactory::getInstance( 'Linter' ); $lbFactory = $this->dbLoadBalancerFactory; $dbw = $this->getDBConnectionRef( DB_PRIMARY ); $dbread = $this->getDBConnectionRef( DB_REPLICA ); $logger->info( "Migrate namespace starting\n" ); $updated = 0; $lastElement = 0; do { // Gather some unique pageId values in linter table records into an array $linterPages = []; $result = $dbw->newSelectQueryBuilder() ->select( 'linter_page' ) ->distinct() ->from( 'linter' ) ->where( $dbw->expr( 'linter_page', '>', $lastElement ) ) ->andWhere( [ 'linter_namespace' => null ] ) ->orderBy( 'linter_page' ) ->limit( $linterBatchSize ) ->caller( __METHOD__ ) ->fetchResultSet(); foreach ( $result as $row ) { $lastElement = intval( $row->linter_page ); $linterPages[] = $lastElement; } $linterPagesLength = count( $linterPages ); $pageStartIndex = 0; do { $pageIdBatch = array_slice( $linterPages, $pageStartIndex, $pageBatchSize ); if ( count( $pageIdBatch ) > 0 ) { $pageResults = $dbread->newSelectQueryBuilder() ->select( [ 'page_id', 'page_namespace' ] ) ->from( 'page' ) ->where( [ 'page_id' => $pageIdBatch ] ) ->caller( __METHOD__ ) ->fetchResultSet(); foreach ( $pageResults as $pageRow ) { $pageId = intval( $pageRow->page_id ); $namespaceId = intval( $pageRow->page_namespace ); // If a record about to be updated has been removed by another process, // the update will not error, and continue updating the existing records. $dbw->newUpdateQueryBuilder() ->update( 'linter' ) ->set( [ 'linter_namespace' => $namespaceId ] ) ->where( [ 'linter_namespace' => null, 'linter_page' => $pageId ] ) ->caller( __METHOD__ ) ->execute(); $updated += $dbw->affectedRows(); } // Sleep between batches for replication to catch up $lbFactory->waitForReplication(); sleep( $sleep ); } $pageStartIndex += $pageBatchSize; } while ( $linterPagesLength > $pageStartIndex ); $logger->info( 'Migrated ' . $updated . " page IDs\n" ); } while ( $linterPagesLength > 0 ); $logger->info( "Migrate namespace finished!\n" ); return $updated; } /** * This code migrates the content of Linter record linter_params to linter_template and * linter_tag fields if they are unpopulated or stale. * This code should only be run once and thereafter disabled but must run to completion. * It can be restarted if interrupted and will pick up where new divergences are found. * Note: When linter_params are not set, the content is set to '[]' indicating no content * and the code also handles a null linter_params field if found. * This code is only run once by maintenance script migrateTagTemplate.php * * @param int $batchSize * @param int $sleep * @return int */ public function migrateTemplateAndTagInfo( int $batchSize, int $sleep ): int { if ( $sleep < 0 ) { $sleep = 0; } $logger = LoggerFactory::getInstance( 'Linter' ); $lbFactory = $this->dbLoadBalancerFactory; $dbw = $this->getDBConnectionRef( DB_PRIMARY ); $logger->info( "Migration of linter_params field to linter_tag and linter_template fields starting\n" ); $updated = 0; $lastElement = 0; do { $results = $dbw->newSelectQueryBuilder() ->select( [ 'linter_id', 'linter_params', 'linter_template', 'linter_tag' ] ) ->from( 'linter' ) ->where( [ $dbw->expr( "linter_params", '!=', '[]' ), $dbw->expr( "linter_params", '!=', null ), $dbw->expr( "linter_id", '>', $lastElement ) ] ) ->orderBy( 'linter_id', selectQueryBuilder::SORT_ASC ) ->limit( $batchSize ) ->caller( __METHOD__ ) ->fetchResultSet(); $linterBatchLength = 0; foreach ( $results as $row ) { $linter_id = intval( $row->linter_id ); $lastElement = $linter_id; $linter_params = FormatJson::decode( $row->linter_params ); $templateInfo = $linter_params->templateInfo ?? ''; if ( is_object( $templateInfo ) ) { if ( isset( $templateInfo->multiPartTemplateBlock ) ) { $templateInfo = 'multi-part-template-block'; } else { $templateInfo = $templateInfo->name ?? ''; } } $templateInfo = mb_strcut( $templateInfo, 0, self::MAX_TEMPLATE_LENGTH ); $tagInfo = $linter_params->name ?? ''; $tagInfo = mb_strcut( $tagInfo, 0, self::MAX_TAG_LENGTH ); // compare the content of linter_params to the template and tag field contents // and if they diverge, update the field with the correct template and tag info. // This behavior allows this function to be restarted should it be interrupted // and avoids repeating database record updates that are already correct due to // having been populated when the error record was created with the new recordLintError // write code that populates the template and tag fields, or for records populated // during a previous but interrupted run of this migrate code. if ( $templateInfo != $row->linter_template || $tagInfo != $row->linter_tag ) { // If the record about to be updated has been removed by another process, // the update will not do anything and just return with no records updated. $dbw->newUpdateQueryBuilder() ->update( 'linter' ) ->set( [ 'linter_template' => $templateInfo, 'linter_tag' => $tagInfo, ] ) ->where( [ 'linter_id' => $linter_id ] ) ->caller( __METHOD__ ) ->execute(); $updated += $dbw->affectedRows(); } $linterBatchLength++; } // Sleep between batches for replication to catch up $lbFactory->waitForReplication(); if ( $sleep > 0 ) { sleep( $sleep ); } $logger->info( 'Migrated ' . $updated . " linter IDs\n" ); } while ( $linterBatchLength > 0 ); $logger->info( "Migrate linter_params to linter_tag and linter_template fields finished!\n" ); return $updated; } }