2011-12-28 01:37:06 +00:00
|
|
|
/**
|
|
|
|
* Command line wikidom parse utility.
|
|
|
|
* Read from STDIN, write to STDOUT.
|
2012-01-20 02:36:18 +00:00
|
|
|
*
|
|
|
|
* @author Neil Kandalgaonkar <neilk@wikimedia.org>
|
|
|
|
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
2011-12-28 01:37:06 +00:00
|
|
|
*/
|
|
|
|
|
2012-01-21 22:42:54 +00:00
|
|
|
var ParserPipeline = require('./mediawiki.parser.js').ParserPipeline,
|
|
|
|
ParserEnv = require('./mediawiki.parser.environment.js').MWParserEnvironment,
|
|
|
|
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter,
|
|
|
|
optimist = require('optimist');
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
( function() {
|
2012-01-21 22:42:54 +00:00
|
|
|
var argv = optimist.usage( 'Usage: $0', {
|
|
|
|
'html': {
|
|
|
|
description: 'Produce html output instead of WikiDom',
|
|
|
|
'boolean': true,
|
|
|
|
'default': false
|
|
|
|
},
|
|
|
|
'debug': {
|
|
|
|
description: 'Debug mode',
|
|
|
|
'boolean': true,
|
|
|
|
'default': false
|
2012-01-22 19:32:28 +00:00
|
|
|
},
|
|
|
|
'maxdepth': {
|
|
|
|
description: 'Maximum expansion depth',
|
|
|
|
'boolean': false,
|
|
|
|
'default': 40
|
2012-01-21 22:42:54 +00:00
|
|
|
}
|
|
|
|
}).argv;
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
|
2012-01-20 00:49:27 +00:00
|
|
|
var env = new ParserEnv( {
|
2012-01-20 02:36:18 +00:00
|
|
|
// fetch templates from enwiki by default..
|
|
|
|
wgScriptPath: "http://en.wikipedia.org/w",
|
|
|
|
wgScriptExtension: ".php",
|
2012-01-20 00:49:27 +00:00
|
|
|
fetchTemplates: true,
|
2012-01-20 02:36:18 +00:00
|
|
|
// enable/disable debug output using this switch
|
2012-01-22 19:32:28 +00:00
|
|
|
debug: argv.debug,
|
|
|
|
maxDepth: argv.maxdepth
|
2012-01-20 00:49:27 +00:00
|
|
|
} ),
|
2012-01-19 23:43:39 +00:00
|
|
|
parser = new ParserPipeline( env );
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
process.stdin.resume();
|
|
|
|
process.stdin.setEncoding('utf8');
|
|
|
|
|
|
|
|
var inputChunks = [];
|
|
|
|
process.stdin.on( 'data', function( chunk ) {
|
|
|
|
inputChunks.push( chunk );
|
|
|
|
} );
|
|
|
|
|
2012-01-18 23:46:01 +00:00
|
|
|
|
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
process.stdin.on( 'end', function() {
|
|
|
|
var input = inputChunks.join('');
|
2012-01-18 23:46:01 +00:00
|
|
|
parser.on('document', function ( document ) {
|
2012-01-21 22:42:54 +00:00
|
|
|
if ( ! argv.html ) {
|
|
|
|
var wikiDom = new DOMConverter().HTMLtoWiki( document.body ),
|
2012-01-18 23:46:01 +00:00
|
|
|
// Serialize the WikiDom with indentation
|
|
|
|
output = JSON.stringify( wikiDom, null, 2 );
|
2012-01-21 22:42:54 +00:00
|
|
|
process.stdout.write( output );
|
|
|
|
} else {
|
|
|
|
// Print out the html
|
|
|
|
process.stdout.write( document.body.innerHTML );
|
|
|
|
}
|
2012-01-18 23:46:01 +00:00
|
|
|
// add a trailing newline for shell user's benefit
|
|
|
|
process.stdout.write( "\n" );
|
|
|
|
process.exit(0);
|
|
|
|
});
|
|
|
|
// Kick off the pipeline by feeding the input into the parser pipeline
|
2012-01-04 08:39:45 +00:00
|
|
|
parser.parse( input );
|
2011-12-28 01:37:06 +00:00
|
|
|
} );
|
|
|
|
|
|
|
|
} )();
|