*/ class ApiQueryParseTree extends ApiQueryBase { const paramPrefix = 'pa'; public function __construct( $query, $moduleName ) { parent::__construct( $query, $moduleName, self::paramPrefix ); } /** * Obtain the parse of a wiki article */ public function execute() { $titles = $this->getPageSet()->getGoodTitles(); if ( count( $titles ) == 0 ) { return; } $params = $this->extractRequestParams(); foreach( $titles as $title ) { $pageId = $title->getArticleID(); if ( ! $title->hasSourceText() ) { $results = array( 'errors' => 'Title does not exist' ); } else if ( ! $title->userCan( 'read' ) ) { $results = array( 'errors' => 'Cannot read this title' ); } else { # Get the article text $rev = Revision::newFromTitle( $title ); if ( !is_object( $rev ) ){ $results = array( 'errors' => 'Could not get revision' ); } else { $text = $rev->getText(); $results['contents'] = $this->getParseTree( $text ); } } foreach ( $results as $elementName => $item ) { $fit = $this->addPageSubItem( $pageId, $item, $elementName ); if ( ! $fit ) { /* ?? this should never happen */ } } } } /** * Execute a shell command to obtain the parse tree of a string of wikitext * * @param String wikitext * @return Array ret Array of two keys, 'contents' with script output, 'errors' with errors if any. */ public function getParseTree( $text ) { global $wgVisualEditorParserCmd, $wgServer, $wgScriptPath; // TODO This is probably not very portable to non-Unix OS. There's a lot of code in wfShellExec to make // that portable and secure on Windows etc. In any case the existing PHP-based parser calls out to an external script to do // HTML-tidy anyway, in a similar manner, without any shell escapes... $cmd = join( " ", array( $wgVisualEditorParserCmd, '--wgScriptPath', wfEscapeShellArg( $wgServer . $wgScriptPath ), ) ); $parserIn = 0; $parserOut = 1; $parserErr = 2; $descriptorSpec = array( $parserIn => array( 'pipe', 'r' ), // child reads from stdin, we write $parserOut => array( 'pipe', 'w' ), // child writes to stdout, we read $parserErr => array( 'pipe', 'w' ) // child writes to stderr, we read ); $pipes = array(); $process = proc_open( $cmd, $descriptorSpec, $pipes ); $ret = array( 'contents' => null ); if ( is_resource( $process ) ) { // Theoretically, this style of communication could cause a deadlock // here. If the stdout buffer fills up, then writes to stdin could // block. // // TODO This might be an opportune time to give the parser all the subtemplates it's going to need, // such that we can. The DB does have a mapping of title to templates used. // Would need a convention for indicating multiple files in the text we write to the parser, like mime multipart fwrite( $pipes[$parserIn], $text ); fclose( $pipes[$parserIn] ); $ret['contents'] = FormatJson::decode( self::readFilehandle( $pipes[$parserOut] ) ); $errors = self::readFilehandle( $pipes[$parserErr] ); if ( $errors != '' and $errors != null ) { $ret['errors'] = $errors; } $retval = proc_close( $process ); if ( $retval != 0 ) { wfWarn( "Parser process returned $retval" ); if ( !$ret['errors'] ) { $ret['errors'] = 'External parser process returned error code, check server logs'; } } } else { wfWarn( "Unable to start external parser process" ); $ret['errors'] = "Unable to start external parser process, check server logs"; } return $ret; } /** * Read filehandle until done. XXX May block! * @param Filehandle $fh * @return String contents of filehandle until EOF */ public static function readFilehandle( $fh ) { $contents = ''; // appends are probably slow, replace with array & join? while ( !feof( $fh ) ) { $contents .= fgets( $fh, 1024 ); } fclose( $fh ); return $contents; } public function getCacheMode( $params ) { return 'public'; } public function getAllowedParams() { return array( 'structure' => array( ApiBase::PARAM_TYPE => array( 'wikidom' ), ApiBase::PARAM_DFLT => 'wikidom', ApiBase::PARAM_ISMULTI => false, ), ); } public function getParamDescription() { return array( 'structure' => 'How to structure the parse tree.' ); } public function getDescription() { return 'Returns parse tree of the given page(s)'; } public function getExamples() { return array( 'Get a parse tree of the [[Main Page]]:', ' api.php?action=query&prop=parsetree&titles=Main%20Page', '', 'Get a parse tree with a specific structure:', ' api.php?action=query&prop=parsetree&' . self::paramPrefix .'structure=wikidom&titles=Main%20Page', ); } public function getVersion() { return __CLASS__ . ': $Id: $'; } // TODO errors! }