mediawiki-extensions-Visual.../api/ApiQueryParseTree.php
2012-01-31 00:10:03 +00:00

181 lines
5 KiB
PHP

<?php
/**
* API module to parse wikitext articles using the new parser, which as of early 2012 is node.js-based.
*
* @file ApiQueryParseTree.php
*
* @license GNU GPL v2 or later
* @author Neil Kandalgaonkar <neilk@wikimedia.org>
*/
class ApiQueryParseTree extends ApiQueryBase {
const paramPrefix = 'pa';
public function __construct( $query, $moduleName ) {
parent::__construct( $query, $moduleName, self::paramPrefix );
}
/**
* Obtain the parse of a wiki article
*/
public function execute() {
$titles = $this->getPageSet()->getGoodTitles();
if ( count( $titles ) == 0 ) {
return;
}
$params = $this->extractRequestParams();
foreach( $titles as $title ) {
$pageId = $title->getArticleID();
if ( ! $title->hasSourceText() ) {
$results = array( 'errors' => 'Title does not exist' );
} else if ( ! $title->userCan( 'read' ) ) {
$results = array( 'errors' => 'Cannot read this title' );
} else {
# Get the article text
$rev = Revision::newFromTitle( $title );
if ( !is_object( $rev ) ){
$results = array( 'errors' => 'Could not get revision' );
} else {
$text = $rev->getText();
$results['contents'] = $this->getParseTree( $text );
}
}
foreach ( $results as $elementName => $item ) {
$fit = $this->addPageSubItem( $pageId, $item, $elementName );
if ( ! $fit ) {
/* ?? this should never happen */
}
}
}
}
/**
* Execute a shell command to obtain the parse tree of a string of wikitext
*
* @param String wikitext
* @return Array ret Array of two keys, 'contents' with script output, 'errors' with errors if any.
*/
public function getParseTree( $text ) {
global $wgVisualEditorParserCmd, $wgServer, $wgScriptPath;
// TODO This is probably not very portable to non-Unix OS. There's a lot of code in wfShellExec to make
// that portable and secure on Windows etc. In any case the existing PHP-based parser calls out to an external script to do
// HTML-tidy anyway, in a similar manner, without any shell escapes...
$cmd = join( " ", array(
$wgVisualEditorParserCmd,
'--wgScriptPath', wfEscapeShellArg( $wgServer . $wgScriptPath ),
) );
$parserIn = 0;
$parserOut = 1;
$parserErr = 2;
$descriptorSpec = array(
$parserIn => array( 'pipe', 'r' ), // child reads from stdin, we write
$parserOut => array( 'pipe', 'w' ), // child writes to stdout, we read
$parserErr => array( 'pipe', 'w' ) // child writes to stderr, we read
);
$pipes = array();
$process = proc_open( $cmd, $descriptorSpec, $pipes );
$ret = array( 'contents' => null );
if ( is_resource( $process ) ) {
// Theoretically, this style of communication could cause a deadlock
// here. If the stdout buffer fills up, then writes to stdin could
// block.
//
// TODO This might be an opportune time to give the parser all the subtemplates it's going to need,
// such that we can. The DB does have a mapping of title to templates used.
// Would need a convention for indicating multiple files in the text we write to the parser, like mime multipart
fwrite( $pipes[$parserIn], $text );
fclose( $pipes[$parserIn] );
$ret['contents'] = FormatJson::decode( self::readFilehandle( $pipes[$parserOut] ) );
$errors = self::readFilehandle( $pipes[$parserErr] );
if ( $errors != '' and $errors != null ) {
$ret['errors'] = $errors;
}
$retval = proc_close( $process );
if ( $retval != 0 ) {
wfWarn( "Parser process returned $retval" );
if ( !$ret['errors'] ) {
$ret['errors'] = 'External parser process returned error code, check server logs';
}
}
} else {
wfWarn( "Unable to start external parser process" );
$ret['errors'] = "Unable to start external parser process, check server logs";
}
return $ret;
}
/**
* Read filehandle until done. XXX May block!
* @param Filehandle $fh
* @return String contents of filehandle until EOF
*/
public static function readFilehandle( $fh ) {
$contents = '';
// appends are probably slow, replace with array & join?
while ( !feof( $fh ) ) {
$contents .= fgets( $fh, 1024 );
}
fclose( $fh );
return $contents;
}
public function getCacheMode( $params ) {
return 'public';
}
public function getAllowedParams() {
return array(
'structure' => array(
ApiBase::PARAM_TYPE => array( 'wikidom' ),
ApiBase::PARAM_DFLT => 'wikidom',
ApiBase::PARAM_ISMULTI => false,
),
);
}
public function getParamDescription() {
return array(
'structure' => 'How to structure the parse tree.'
);
}
public function getDescription() {
return 'Returns parse tree of the given page(s)';
}
public function getExamples() {
return array(
'Get a parse tree of the [[Main Page]]:',
' api.php?action=query&prop=parsetree&titles=Main%20Page',
'',
'Get a parse tree with a specific structure:',
' api.php?action=query&prop=parsetree&' . self::paramPrefix .'structure=wikidom&titles=Main%20Page',
);
}
public function getVersion() {
return __CLASS__ . ': $Id: $';
}
// TODO errors!
}