config = $configFactory->makeConfig( 'textextracts' ); $this->cache = $cache; $this->langConvFactory = $langConvFactory; $this->wikiPageFactory = $wikiPageFactory; } /** * Evaluates the parameters, performs the requested extraction of text, * and sets up the result */ public function execute() { $titles = $this->getPageSet()->getGoodTitles(); if ( $titles === [] ) { return; } $isXml = $this->getMain()->isInternalMode() || $this->getMain()->getPrinter()->getFormat() == 'XML'; $result = $this->getResult(); $params = $this->params = $this->extractRequestParams(); $this->requireMaxOneParameter( $params, 'chars', 'sentences' ); $continue = 0; $limit = intval( $params['limit'] ); if ( $limit > 1 && !$params['intro'] && count( $titles ) > 1 ) { $limit = 1; $this->addWarning( [ 'apiwarn-textextracts-limit', $limit ] ); } if ( isset( $params['continue'] ) ) { $continue = intval( $params['continue'] ); $this->dieContinueUsageIf( $continue < 0 || $continue > count( $titles ) ); $titles = array_slice( $titles, $continue, null, true ); } $count = 0; $titleInFileNamespace = false; /** @var Title $t */ foreach ( $titles as $id => $t ) { if ( ++$count > $limit ) { $this->setContinueEnumParameter( 'continue', $continue + $count - 1 ); break; } if ( $t->inNamespace( NS_FILE ) ) { $text = ''; $titleInFileNamespace = true; } else { $params = $this->params; $text = $this->getExtract( $t ); $text = $this->truncate( $text ); if ( $params['plaintext'] ) { $text = $this->doSections( $text ); } else { if ( $params['sentences'] ) { $this->addWarning( $this->msg( 'apiwarn-textextracts-sentences-and-html', self::PREFIX ) ); } $this->addWarning( 'apiwarn-textextracts-malformed-html' ); } } if ( $isXml ) { $fit = $result->addValue( [ 'query', 'pages', $id ], 'extract', [ '*' => $text ] ); } else { $fit = $result->addValue( [ 'query', 'pages', $id ], 'extract', $text ); } if ( !$fit ) { $this->setContinueEnumParameter( 'continue', $continue + $count - 1 ); break; } } if ( $titleInFileNamespace ) { $this->addWarning( 'apiwarn-textextracts-title-in-file-namespace' ); } } /** * @param array $params Ignored parameters * @return string */ public function getCacheMode( $params ) { return 'public'; } /** * Returns a processed, but not trimmed extract * @param Title $title * @return string */ private function getExtract( Title $title ) { $contentModel = $title->getContentModel(); if ( !in_array( $contentModel, $this->supportedContentModels, true ) ) { $this->addWarning( [ 'apiwarn-textextracts-unsupportedmodel', wfEscapeWikiText( $title->getPrefixedText() ), $contentModel ] ); return ''; } $page = $this->wikiPageFactory->newFromTitle( $title ); $introOnly = $this->params['intro']; $text = $this->getFromCache( $page, $introOnly ); // if we need just first section, try retrieving full page and getting first section out of it if ( $text === false && $introOnly ) { $text = $this->getFromCache( $page, false ); if ( $text !== false ) { $text = $this->getFirstSection( $text, $this->params['plaintext'] ); } } if ( $text === false ) { $text = $this->parse( $page ); $text = $this->convertText( $text ); $this->setCache( $page, $text ); } return $text; } /** * @param WANObjectCache $cache * @param WikiPage $page * @param bool $introOnly * @return string */ private function cacheKey( WANObjectCache $cache, WikiPage $page, $introOnly ) { $langConv = $this->langConvFactory->getLanguageConverter( $page->getTitle()->getPageLanguage() ); return $cache->makeKey( 'textextracts', self::CACHE_VERSION, $page->getId(), $page->getTouched(), $langConv->getPreferredVariant(), $this->params['plaintext'] ? 'plaintext' : 'html', $introOnly ? 'intro' : 'full' ); } /** * @param WikiPage $page * @param bool $introOnly * @return string|false */ private function getFromCache( WikiPage $page, $introOnly ) { $cache = $this->cache; // @TODO: replace with getWithSetCallback() $key = $this->cacheKey( $cache, $page, $introOnly ); return $cache->get( $key ); } /** * @param WikiPage $page * @param string $text */ private function setCache( WikiPage $page, $text ) { $cache = $this->cache; // @TODO: replace with getWithSetCallback() $key = $this->cacheKey( $cache, $page, $this->params['intro'] ); $cache->set( $key, $text, $this->getConfig()->get( 'ParserCacheExpireTime' ) ); } /** * @param string $text * @param bool $plainText * @return string */ private function getFirstSection( $text, $plainText ) { if ( $plainText ) { $regexp = '/^.*?(?=' . ExtractFormatter::SECTION_MARKER_START . '(?!.' . ExtractFormatter::SECTION_MARKER_END . '